diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..1aeb606b3f109b11fb5a34a1c77156ab35d49b4c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+VnCoreNLP/VnCoreNLP-1.1.1.jar filter=lfs diff=lfs merge=lfs -text
+VnCoreNLP/VnCoreNLP-1.2.jar filter=lfs diff=lfs merge=lfs -text
+VnCoreNLP/models/postagger/vi-tagger filter=lfs diff=lfs merge=lfs -text
+checkpoints/ir/ir_2.bak filter=lfs diff=lfs merge=lfs -text
+src/sts/build/lib.linux-x86_64-cpython-38/detectron2/_C.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o filter=lfs diff=lfs merge=lfs -text
+src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o filter=lfs diff=lfs merge=lfs -text
+src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o filter=lfs diff=lfs merge=lfs -text
+src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o filter=lfs diff=lfs merge=lfs -text
+src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o filter=lfs diff=lfs merge=lfs -text
+src/sts/detectron2/_C.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+src/sts/dist/detectron2-0.4-py3.8-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..9e0ee1491ec9158e6df8eda7c6573dde3688b3b5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,145 @@
+# outputs
+outputs/*
+
+# uploads
+static/uploads/*.*
+static/images/*.*
+static/videos/*/
+
+# logs
+logs
+tensorboard
+lightning_logs
+
+# checkpoints
+checkpoints/*/*.*
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/README.md b/README.md
index c070c7cc2d1b5b56cf9bf29c150b178a78a7a325..e81075cd47c3d16f29d35143bb8c7de30cec36bc 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,33 @@
 ---
-title: Poi Engineering
-emoji: 🔥
-colorFrom: purple
-colorTo: indigo
-sdk: gradio
-sdk_version: 4.7.1
+title: poi_Engineering
 app_file: app.py
-pinned: false
+sdk: gradio
+sdk_version: 3.50.2
 ---
+# Pipeline_POI_Engineering
+Repo Signboard segmentation and Text recognition
+
+## Download pretrain model:
+
+ss: signboards segmentation
+str: scene text recognition
+ir: information retrieval
+
+All the checkpoints are available here: [poi_engineering_checkpoints](https://drive.google.com/drive/folders/1mY5RWhaxNAFiAX-rROZSnJtd1npVB48U?usp=sharing)
+
+For each model, copy the checkpoint to the same directory (ir --> ir; ss --> ss; str --> str)
+
+## Run:
+Setup enviroments: 
+```sh
+bash setup.sh
+```
+Extract text: 
+```sh
+bash extract.sh
+```
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## Image parser
+```
+curl -F "image=@/home/trungtv/sources/POI_Engineering/outputs/20220520_150925/frames/frames_0.jpg" http://localhost:5050/parse_image
+```
diff --git a/README_.md b/README_.md
new file mode 100644
index 0000000000000000000000000000000000000000..b3e525f397ac1f3d0ab4271bf8c8bcc714b93490
--- /dev/null
+++ b/README_.md
@@ -0,0 +1,7 @@
+### Installation
+
+```
+conda create -n poi_engineering python=3.7
+conda activate poi_engineering
+
+```
\ No newline at end of file
diff --git a/VnCoreNLP/LICENSE.md b/VnCoreNLP/LICENSE.md
new file mode 100644
index 0000000000000000000000000000000000000000..ac6bda3406ba2fdb9ea50f14435b4e03e96d36f3
--- /dev/null
+++ b/VnCoreNLP/LICENSE.md
@@ -0,0 +1,14 @@
+    Copyright (C) 2018-2019 vncorenlp
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
diff --git a/VnCoreNLP/Readme.md b/VnCoreNLP/Readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..e4ad4a76cdd78dc27becf2e781527dfa50be8785
--- /dev/null
+++ b/VnCoreNLP/Readme.md
@@ -0,0 +1,136 @@
+#### Table of contents
+1. [Introduction](#introduction)
+2. [Installation](#install)
+2. [Usage for Python users](#python)
+3. [Usage for Java users](#java)
+4. [Experimental results](#exp)
+
+# VnCoreNLP: A Vietnamese natural language processing toolkit <a name="introduction"></a>
+
+VnCoreNLP is a **fast and accurate** NLP annotation pipeline for Vietnamese, providing rich linguistic annotations through key NLP components of **word segmentation**, **POS tagging**, **named entity recognition** (NER) and **dependency parsing**. Users do not have to install external dependencies. Users can run processing pipelines from either the command-line or the  API. The general architecture and experimental results of VnCoreNLP can be found in the following related papers:
+
+1. Thanh Vu, Dat Quoc Nguyen, Dai Quoc Nguyen, Mark Dras and Mark Johnson. **2018**. [VnCoreNLP: A Vietnamese Natural Language Processing Toolkit](http://aclweb.org/anthology/N18-5012). In  *Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Demonstrations*, [NAACL 2018](http://naacl2018.org), pages 56-60. [[.bib]](http://aclweb.org/anthology/N18-5012.bib)
+2. Dat Quoc Nguyen, Dai Quoc Nguyen, Thanh Vu, Mark Dras and Mark Johnson. **2018**. [A Fast and Accurate Vietnamese Word Segmenter](http://www.lrec-conf.org/proceedings/lrec2018/summaries/55.html). In *Proceedings of the 11th International Conference on Language Resources and Evaluation*, [LREC 2018](http://lrec2018.lrec-conf.org/en/), pages 2582-2587. [[.bib]](https://dblp.uni-trier.de/rec/bibtex/conf/lrec/NguyenNVDJ18)
+3. Dat Quoc Nguyen, Thanh Vu, Dai Quoc Nguyen, Mark Dras and Mark Johnson. **2017**. [From Word Segmentation to POS Tagging for Vietnamese](http://aclweb.org/anthology/U17-1013). In *Proceedings of the 15th Annual Workshop of the Australasian Language Technology Association*, [ALTA 2017](http://alta2017.alta.asn.au), pages 108-113. [[.bib]](http://aclweb.org/anthology/U17-1013.bib)
+
+Please **CITE** paper [1] whenever VnCoreNLP is used to produce published results or incorporated into other software. If you are dealing in depth with either word segmentation or POS tagging, you are also encouraged to cite paper [2] or [3], respectively. 
+
+If you are looking for light-weight versions, VnCoreNLP's word segmentation and POS tagging components have also been released as independent packages [RDRsegmenter](https://github.com/datquocnguyen/RDRsegmenter)  [2]  and [VnMarMoT](https://github.com/datquocnguyen/VnMarMoT) [3], resepectively.
+
+
+## Installation <a name="install"></a>
+
+- `Java 1.8+` (Prerequisite)
+- File  `VnCoreNLP-1.2.jar` (27MB) and folder `models` (115MB) are placed in the same working folder.
+- `Python 3.6+` if using [a Python wrapper of VnCoreNLP](https://github.com/thelinhbkhn2014/VnCoreNLP_Wrapper). To install this wrapper, users have to run the following command:
+
+    `$ pip3 install py_vncorenlp` 
+    
+    _A special thanks goes to [Linh The Nguyen](https://github.com/thelinhbkhn2014) for creating this wrapper!_
+    
+    
+## Usage for Python users <a name="python"></a>
+
+```python
+import py_vncorenlp
+
+# Automatically download VnCoreNLP components from the original repository
+# and save them in some local working folder
+py_vncorenlp.download_model(save_dir='/absolute/path/to/vncorenlp')
+
+# Load VnCoreNLP from the local working folder that contains both `VnCoreNLP-1.2.jar` and `models` 
+model = py_vncorenlp.VnCoreNLP(save_dir='/absolute/path/to/vncorenlp')
+# Equivalent to: model = py_vncorenlp.VnCoreNLP(annotators=["wseg", "pos", "ner", "parse"], save_dir='/absolute/path/to/vncorenlp')
+
+# Annotate a raw corpus
+model.annotate_file(input_file="/absolute/path/to/input/file", output_file="/absolute/path/to/output/file")
+
+# Annotate a raw text
+model.print_out(model.annotate_text("Ông Nguyễn Khắc Chúc  đang làm việc tại Đại học Quốc gia Hà Nội. Bà Lan, vợ ông Chúc, cũng làm việc tại đây."))
+```
+
+By default, the output is formatted with 6 columns representing word index, word form, POS tag, NER label, head index of the current word and its dependency relation type:
+
+```
+1       Ông     Nc      O       4       sub
+2       Nguyễn_Khắc_Chúc        Np      B-PER   1       nmod
+3       đang    R       O       4       adv
+4       làm_việc        V       O       0       root
+5       tại     E       O       4       loc
+6       Đại_học N       B-ORG   5       pob
+...
+```
+
+For users who use VnCoreNLP only for word segmentation:
+
+```python
+rdrsegmenter = py_vncorenlp.VnCoreNLP(annotators=["wseg"], save_dir='/absolute/path/to/vncorenlp')
+text = "Ông Nguyễn Khắc Chúc  đang làm việc tại Đại học Quốc gia Hà Nội. Bà Lan, vợ ông Chúc, cũng làm việc tại đây."
+output = rdrsegmenter.word_segment(text)
+print(output)
+# ['Ông Nguyễn_Khắc_Chúc đang làm_việc tại Đại_học Quốc_gia Hà_Nội .', 'Bà Lan , vợ ông Chúc , cũng làm_việc tại đây .']
+```
+
+
+
+## Usage for Java users <a name="java"></a>
+
+### Using VnCoreNLP from the command line
+
+You can run VnCoreNLP to annotate an input raw text corpus (e.g. a collection of news content) by using following commands:
+
+    // To perform word segmentation, POS tagging, NER and then dependency parsing
+    $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt
+    // To perform word segmentation, POS tagging and then NER
+    $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos,ner
+    // To perform word segmentation and then POS tagging
+    $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos
+    // To perform word segmentation
+    $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg    
+
+
+### Using VnCoreNLP from the API
+
+The following code is a simple and complete example:
+
+```java
+import vn.pipeline.*;
+import java.io.*;
+public class VnCoreNLPExample {
+    public static void main(String[] args) throws IOException {
+    
+        // "wseg", "pos", "ner", and "parse" refer to as word segmentation, POS tagging, NER and dependency parsing, respectively. 
+        String[] annotators = {"wseg", "pos", "ner", "parse"}; 
+        VnCoreNLP pipeline = new VnCoreNLP(annotators); 
+    
+        String str = "Ông Nguyễn Khắc Chúc  đang làm việc tại Đại học Quốc gia Hà Nội. Bà Lan, vợ ông Chúc, cũng làm việc tại đây."; 
+        
+        Annotation annotation = new Annotation(str); 
+        pipeline.annotate(annotation); 
+        
+        System.out.println(annotation.toString());
+        // 1    Ông                 Nc  O       4   sub 
+        // 2    Nguyễn_Khắc_Chúc    Np  B-PER   1   nmod
+        // 3    đang                R   O       4   adv
+        // 4    làm_việc            V   O       0   root
+        // ...
+        
+        //Write to file
+        PrintStream outputPrinter = new PrintStream("output.txt");
+        pipeline.printToFile(annotation, outputPrinter); 
+    
+        // You can also get a single sentence to analyze individually 
+        Sentence firstSentence = annotation.getSentences().get(0);
+        System.out.println(firstSentence.toString());
+    }
+}
+```
+
+<img width="1039" alt="vncorenlpexample" src="https://user-images.githubusercontent.com/33695776/37561346-aca1fd68-2aa0-11e8-8bd8-530577b0b5cf.png">
+
+See VnCoreNLP's open-source in folder `src` for API details. 
+
+## Experimental results <a name="exp"></a>
+
+See details in papers [1,2,3] above or at [NLP-progress](http://nlpprogress.com/vietnamese/vietnamese.html).
+
diff --git a/VnCoreNLP/TagsetDescription.md b/VnCoreNLP/TagsetDescription.md
new file mode 100644
index 0000000000000000000000000000000000000000..6d131b6459c6191004bf32e8a84969c83a56943d
--- /dev/null
+++ b/VnCoreNLP/TagsetDescription.md
@@ -0,0 +1,67 @@
+## POS tags, NER types and dependency labels in VnCoreNLP
+
+The following sections are to briefly describe  [POS tags](https://github.com/vncorenlp/VnCoreNLP/blob/master/VLSP2013_POS_tagset.pdf), [NER types](http://vlsp.org.vn/vlsp2016/eval/ner) and [dependency labels](https://github.com/vncorenlp/VnCoreNLP/blob/master/VnDT-treebank-description.pdf) used in VnCoreNLP. See details in [Link-to-POS-tag-description](https://github.com/vncorenlp/VnCoreNLP/blob/master/VLSP2013_POS_tagset.pdf), [Link-to-NER-type-description](http://vlsp.org.vn/vlsp2016/eval/ner) and [Link-to-dependency-label-description](https://github.com/vncorenlp/VnCoreNLP/blob/master/VnDT-treebank-description.pdf).  
+
+### POS tags
+
+|Label| Meaning  |
+|---|---|
+| Np | Proper noun |
+| Nc | Classifier noun | 
+| Nu | Unit noun | 
+| N | Noun | 
+| Ny | Abbreviated noun | 
+| Nb | (Foreign) borrowed noun|
+| V | Verb| 
+|Vb |(Foreign) borrowed verb|
+|A| Adjective|
+|P| Pronoun|
+|R |Adverb|
+|L| Determiner|
+|M |Numeral/Quantity|
+|E |Preposition|
+|C |Subordinating conjunction|
+|Cc |Coordinating conjunction|
+|I |Interjection/Exclamation|
+|T |Particle/Auxiliary, modal words|
+|Y |Abbreviation|
+|Z |Bound morpheme|
+|X |Un-definition/Other|
+|CH |Punctuation and symbols|
+
+### NER types
+
+|Label| Meaning  |
+|---|---|
+| PER | Names of persons |
+| LOC | Names of locations | 
+| ORG| Names of organizations| 
+| MISC|Names of miscellaneous entities|
+
+### Top 21 most frequent dependency labels
+
+These following labels has an appearance rate of at least 0.2%:
+
+|Label| Meaning  |
+|---|---|
+|adv|Adverbial | 
+|amod| Adjective modifier |
+|conj| Conjunction |
+|coord| Coordination |
+|dep| Default label |
+|det| Determiner |
+|dir| Direction |
+|dob| Direct object |
+|iob| Indirect object |
+|loc| Location |
+|mnr| Manner |
+|nmod| Noun modifier |
+|pmod| Prepositional  modifier |
+|pob| Object of a preposition |
+|prd| Predicate |
+|prp| Purpose |
+|punct| Punctuation |
+|root| Root |
+|sub| Subject |
+|tmp|Temporal|
+|vmod| Verb modifier |
diff --git a/VnCoreNLP/VLSP2013_POS_tagset.pdf b/VnCoreNLP/VLSP2013_POS_tagset.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..bb5150a782221779d0f32e1e92b18b69a16068e7
Binary files /dev/null and b/VnCoreNLP/VLSP2013_POS_tagset.pdf differ
diff --git a/VnCoreNLP/VnCoreNLP-1.1.1.jar b/VnCoreNLP/VnCoreNLP-1.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4767a2c5fd541acbf0f15cf42b762a8d97b8292f
--- /dev/null
+++ b/VnCoreNLP/VnCoreNLP-1.1.1.jar
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c356b2baa0b83a287642b29d5c2ec5e9558c84d1c937f0aa88a5eea8748e587e
+size 27412575
diff --git a/VnCoreNLP/VnCoreNLP-1.2.jar b/VnCoreNLP/VnCoreNLP-1.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4420d66fddafb1f71f1787e54fcf6b164829f508
--- /dev/null
+++ b/VnCoreNLP/VnCoreNLP-1.2.jar
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e2811cdbc2ddfc71d04be5dc36e185c88dcd1ad4d5d69e4ff2e1369dccf7793
+size 27412703
diff --git a/VnCoreNLP/VnDT-treebank-description.pdf b/VnCoreNLP/VnDT-treebank-description.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..0dad20b312ea306374172046b87d92ceea63dc70
Binary files /dev/null and b/VnCoreNLP/VnDT-treebank-description.pdf differ
diff --git a/VnCoreNLP/models/dep/vi-dep.xz b/VnCoreNLP/models/dep/vi-dep.xz
new file mode 100644
index 0000000000000000000000000000000000000000..069c542266bc456b8d4f4f0b143267863ea98db2
--- /dev/null
+++ b/VnCoreNLP/models/dep/vi-dep.xz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:266e4a3a55d5edd1607d5f036c2f95b70c0a6c80f58b57fd9962677a6ef331b7
+size 16048864
diff --git a/VnCoreNLP/models/ner/vi-500brownclusters.xz b/VnCoreNLP/models/ner/vi-500brownclusters.xz
new file mode 100644
index 0000000000000000000000000000000000000000..2686b3132b1bd8f77ac699672f0e49a09348c3f0
--- /dev/null
+++ b/VnCoreNLP/models/ner/vi-500brownclusters.xz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d30f9cfdf0af193a69e185d1acda0306a9fbe1321f8a700f7c66557a90f92b8c
+size 5599844
diff --git a/VnCoreNLP/models/ner/vi-ner.xz b/VnCoreNLP/models/ner/vi-ner.xz
new file mode 100644
index 0000000000000000000000000000000000000000..0943737682b9f0436d9bd3346d145c914a3c274a
--- /dev/null
+++ b/VnCoreNLP/models/ner/vi-ner.xz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f04c5e752d7f99a6313b758fc2607a2c3906e58b1d60a37eb0192aead73d61f7
+size 9956876
diff --git a/VnCoreNLP/models/ner/vi-pretrainedembeddings.xz b/VnCoreNLP/models/ner/vi-pretrainedembeddings.xz
new file mode 100644
index 0000000000000000000000000000000000000000..652f01e615707133378610d445b48a33c5fbb13c
--- /dev/null
+++ b/VnCoreNLP/models/ner/vi-pretrainedembeddings.xz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00d3d034f1b23a8bfe5168195741fde845808c212e6dfcd4c94bead1665eb0fc
+size 57313672
diff --git a/VnCoreNLP/models/postagger/vi-tagger b/VnCoreNLP/models/postagger/vi-tagger
new file mode 100644
index 0000000000000000000000000000000000000000..7598249c6ee230668d152befe5979a2b74fdff50
--- /dev/null
+++ b/VnCoreNLP/models/postagger/vi-tagger
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a95608a5982db89c11353b451154ec396eccc0ff1f5b22874935ecdf4e0ace01
+size 29709468
diff --git a/VnCoreNLP/models/wordsegmenter/vi-vocab b/VnCoreNLP/models/wordsegmenter/vi-vocab
new file mode 100644
index 0000000000000000000000000000000000000000..43f9e79dabc510783eb75bb410cbeeb7eeb1678e
Binary files /dev/null and b/VnCoreNLP/models/wordsegmenter/vi-vocab differ
diff --git a/VnCoreNLP/models/wordsegmenter/wordsegmenter.rdr b/VnCoreNLP/models/wordsegmenter/wordsegmenter.rdr
new file mode 100644
index 0000000000000000000000000000000000000000..95cd89879fea6aaca97020228a7c63dfd3cf14de
--- /dev/null
+++ b/VnCoreNLP/models/wordsegmenter/wordsegmenter.rdr
@@ -0,0 +1,1447 @@
+True : object.conclusion = "NN"
+	object.tag == "I" : object.conclusion = "I"
+		object.prevWord1 == "quận" : object.conclusion = "B"
+			object.prevWord1 == "quận" and object.word == "huyện" : object.conclusion = "I"
+			object.prevWord1 == "quận" and object.word == "uỷ" : object.conclusion = "I"
+		object.prevWord1 == "người" and object.word == "ta" : object.conclusion = "B"
+			object.word == "ta" and object.nextWord1 == "chia" and object.nextWord2 == "đất" : object.conclusion = "I"
+			object.word == "ta" and object.nextWord2 == "bảo" : object.conclusion = "I"
+		object.prevWord1 == "con" and object.word == "gái" : object.conclusion = "B"
+		object.prevWord1 == "chủ" and object.word == "đầu" : object.conclusion = "B"
+		object.prevWord2 == "chất" and object.prevWord1 == "độc" and object.word == "da" : object.conclusion = "B"
+		object.prevWord1 == "tái" and object.word == "định" and object.nextWord1 == "cư" : object.conclusion = "B"
+			object.word == "định" and object.nextTag1 == "I" and object.nextTag2 == "" : object.conclusion = "I"
+		object.prevWord1 == "thứ" and object.word == "hai" : object.conclusion = "B"
+			object.nextWord2 == "tư" : object.conclusion = "I"
+		object.prevWord1 == "thì" : object.conclusion = "B"
+			object.prevWord1 == "thì" and object.word == "thầm" : object.conclusion = "I"
+			object.prevWord2 == "" and object.prevWord1 == "thì" and object.word == "ra" : object.conclusion = "I"
+			object.prevWord1 == "thì" and object.nextWord1 == "," : object.conclusion = "I"
+			object.prevWord2 == "có" and object.prevWord1 == "thì" and object.word == "giờ" : object.conclusion = "I"
+			object.prevWord2 == "," and object.word == "ra" : object.conclusion = "I"
+			object.prevWord2 == "ra" and object.word == "có" : object.conclusion = "I"
+			object.word == "thào" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "con" and object.word == "trai" : object.conclusion = "B"
+		object.prevWord1 == "phó" : object.conclusion = "B"
+			object.prevTag1 == "B" and object.word == "mặc" : object.conclusion = "I"
+			object.word == "thác" : object.conclusion = "I"
+			object.word == "tiến" and object.nextWord2 == "và" : object.conclusion = "I"
+		object.word == "vn" : object.conclusion = "B"
+		object.prevWord1 == "khu" and object.nextWord1 == "nghiệp" : object.conclusion = "B"
+		object.prevWord1 == "ty" : object.conclusion = "B"
+		object.prevWord1 == "trên" and object.word == "cơ" : object.conclusion = "B"
+		object.prevTag2 == "I" and object.prevTag1 == "B" and object.word == "là" : object.conclusion = "B"
+			object.prevWord1 == "nghĩa" and object.word == "là" : object.conclusion = "I"
+			object.prevWord1 == "lơ" and object.word == "là" : object.conclusion = "I"
+			object.word == "là" and object.nextWord1 == "chọn" : object.conclusion = "I"
+			object.prevWord1 == "hay" and object.nextWord1 == "sự" : object.conclusion = "I"
+			object.word == "là" and object.nextWord1 == "nhân" : object.conclusion = "I"
+			object.nextWord1 == "chết" : object.conclusion = "I"
+			object.word == "là" and object.nextWord1 == "," : object.conclusion = "I"
+			object.prevWord1 == "tức" and object.word == "là" : object.conclusion = "I"
+			object.word == "là" and object.nextWord2 == "thành" : object.conclusion = "I"
+		object.prevWord1 == "thứ" and object.word == "ba" : object.conclusion = "B"
+			object.prevWord1 == "thứ" and object.word == "ba" and object.nextWord1 == "và" : object.conclusion = "I"
+		object.prevWord2 == "bắt" and object.prevWord1 == "đầu" and object.word == "từ" : object.conclusion = "B"
+			object.nextWord2 == "2002" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "đô" and object.nextTag1 == "I" : object.conclusion = "B"
+		object.prevWord1 == "người" and object.word == "ở" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "giá" : object.conclusion = "B"
+			object.nextWord2 == "lẻ" : object.conclusion = "I"
+			object.nextWord2 == "đồng" : object.conclusion = "I"
+		object.word == "điều" and object.nextWord1 == "kiện" : object.conclusion = "B"
+			object.prevWord1 == "vô" and object.nextWord1 == "kiện" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "gì" and object.nextTag1 == "B" : object.conclusion = "B"
+			object.prevWord1 == "huống" and object.word == "gì" : object.conclusion = "I"
+			object.nextWord1 == "!" : object.conclusion = "I"
+			object.prevWord2 == "," and object.prevWord1 == "nói" and object.word == "gì" : object.conclusion = "I"
+			object.word == "gì" and object.nextWord2 == "ruột" : object.conclusion = "I"
+			object.word == "gì" and object.nextWord1 == "," and object.nextWord2 == "nhưng" : object.conclusion = "I"
+			object.prevWord1 == "làm" and object.word == "gì" and object.nextWord1 == "''" : object.conclusion = "I"
+			object.prevWord1 == "thiếu" : object.conclusion = "I"
+			object.word == "gì" and object.nextWord1 == "không" : object.conclusion = "I"
+			object.prevWord1 == "hèn" and object.word == "gì" : object.conclusion = "I"
+		object.prevWord1 == "được" : object.conclusion = "B"
+			object.nextWord1 == "," : object.conclusion = "I"
+			object.prevWord2 == "nhưng" and object.prevWord1 == "được" and object.word == "cái" : object.conclusion = "I"
+			object.prevWord1 == "được" and object.nextWord1 == "thì" : object.conclusion = "I"
+			object.prevTag2 == "" and object.prevTag1 == "B" : object.conclusion = "I"
+			object.prevTag2 == "I" and object.prevTag1 == "B" and object.word == "mùa" : object.conclusion = "I"
+			object.prevWord1 == "được" and object.word == "việc" and object.nextWord1 == "''" : object.conclusion = "I"
+			object.prevWord2 == "muốn" and object.prevWord1 == "được" and object.word == "việc" : object.conclusion = "I"
+			object.prevWord1 == "được" and object.word == "mùa" and object.nextWord1 == "to" : object.conclusion = "I"
+			object.nextWord2 == "là" : object.conclusion = "I"
+		object.prevWord1 == "người" and object.word == "làm" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "nghĩa" : object.conclusion = "B"
+		object.prevWord2 == "thiếu" and object.word == "tự" : object.conclusion = "B"
+		object.prevWord1 == "ba" and object.word == "tháng" : object.conclusion = "B"
+			object.prevWord1 == "ba" and object.nextWord1 == "hai" : object.conclusion = "I"
+		object.prevWord1 == "trưởng" and object.word == "phòng" : object.conclusion = "B"
+			object.prevTag2 == "" and object.prevTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "cục" and object.word == "trưởng" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.word == "chủ" : object.conclusion = "B"
+			object.word == "chủ" and object.nextWord1 == "," : object.conclusion = "I"
+			object.word == "chủ" and object.nextWord2 == "thức" : object.conclusion = "I"
+			object.prevWord1 == "làm" and object.word == "chủ" and object.nextWord1 == "tình" : object.conclusion = "I"
+		object.prevWord1 == "trong" and object.word == "suốt" : object.conclusion = "B"
+			object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord1 == "làm" and object.word == "công" : object.conclusion = "B"
+			object.nextWord1 == "trừ" : object.conclusion = "I"
+		object.prevWord1 == "anh" and object.word == "ta" : object.conclusion = "B"
+		object.word == "chiến" and object.nextWord1 == "tranh" : object.conclusion = "B"
+			object.prevWord1 == "phạm" and object.word == "chiến" : object.conclusion = "I"
+		object.word == "dân" and object.nextWord1 == "tộc" : object.conclusion = "B"
+		object.word == "không" and object.nextTag1 == "B" and object.nextTag2 == "B" : object.conclusion = "B"
+			object.nextWord1 == "," : object.conclusion = "I"
+			object.prevWord1 == "phòng" : object.conclusion = "I"
+			object.prevWord2 == "ty" and object.prevWord1 == "hàng" and object.word == "không" : object.conclusion = "I"
+			object.prevWord2 == "cảng" and object.word == "không" : object.conclusion = "I"
+			object.word == "không" and object.nextWord2 == "đường" : object.conclusion = "I"
+			object.word == "không" and object.nextWord1 == "bắt" : object.conclusion = "I"
+			object.prevWord2 == "hãng" and object.prevWord1 == "hàng" and object.word == "không" : object.conclusion = "I"
+			object.prevWord2 == "tôn" and object.word == "không" : object.conclusion = "I"
+			object.nextWord2 == "lên" : object.conclusion = "I"
+			object.word == "không" and object.nextWord1 == "sẽ" : object.conclusion = "I"
+			object.nextWord1 == "ở" : object.conclusion = "I"
+			object.prevWord2 == "gia" and object.prevWord1 == "hàng" and object.word == "không" : object.conclusion = "I"
+		object.word == "sau" and object.nextTag1 == "B" : object.conclusion = "B"
+			object.prevWord1 == "trước" : object.conclusion = "I"
+			object.prevWord1 == "mai" and object.nextWord1 == "." : object.conclusion = "I"
+			object.prevWord2 == "vàng" : object.conclusion = "I"
+			object.prevWord2 == "mình" : object.conclusion = "I"
+			object.word == "sau" and object.nextWord1 == "chuyển" : object.conclusion = "I"
+			object.word == "sau" and object.nextWord1 == "cũng" : object.conclusion = "I"
+		object.prevWord1 == "ông" and object.word == "ta" : object.conclusion = "B"
+		object.prevWord1 == "vụ" and object.word == "trưởng" : object.conclusion = "B"
+		object.prevWord1 == "anh" and object.word == "ấy" : object.conclusion = "B"
+		object.prevWord1 == "thuộc" and object.word == "địa" : object.conclusion = "B"
+			object.prevWord1 == "thuộc" and object.nextWord1 == "pháp" : object.conclusion = "I"
+			object.prevWord2 == "trường" : object.conclusion = "I"
+			object.prevWord2 == "nước" and object.prevWord1 == "thuộc" and object.word == "địa" : object.conclusion = "I"
+			object.prevWord2 == "thời" and object.prevWord1 == "thuộc" and object.word == "địa" : object.conclusion = "I"
+		object.prevWord1 == "tiền" and object.nextWord1 == "dụng" : object.conclusion = "B"
+		object.prevWord1 == "như" and object.nextWord1 == "này" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.word == "bằng" : object.conclusion = "B"
+		object.prevWord1 == "viện" and object.word == "trưởng" : object.conclusion = "B"
+		object.prevTag1 == "B" and object.word == "của" : object.conclusion = "B"
+			object.word == "của" and object.nextWord1 == "nhưng" and object.nextWord2 == "được" : object.conclusion = "I"
+			object.nextTag2 == "" : object.conclusion = "I"
+			object.word == "của" and object.nextWord1 == "nên" : object.conclusion = "I"
+		object.prevWord1 == "tính" and object.word == "từ" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "tình" : object.conclusion = "B"
+		object.prevWord1 == "nữa" and object.word == "là" : object.conclusion = "B"
+			object.word == "là" and object.nextWord1 == "..." and object.nextWord2 == "''" : object.conclusion = "I"
+		object.prevWord1 == "mà" and object.word == "cả" : object.conclusion = "B"
+			object.word == "cả" and object.nextWord1 == "gia" : object.conclusion = "I"
+		object.prevWord1 == "khu" and object.word == "tập" : object.conclusion = "B"
+		object.prevWord1 == "máy" and object.word == "điện" : object.conclusion = "B"
+			object.prevWord2 == "cho" : object.conclusion = "I"
+		object.prevWord1 == "con" and object.word == "heo" : object.conclusion = "B"
+		object.prevWord1 == "thứ" and object.word == "tư" : object.conclusion = "B"
+			object.nextWord2 == "tháng" : object.conclusion = "I"
+		object.prevWord1 == "nhà" and object.word == "chung" : object.conclusion = "B"
+		object.word == "biên" and object.nextWord1 == "giới" : object.conclusion = "B"
+		object.prevWord1 == "từ" and object.word == "đường" : object.conclusion = "B"
+		object.prevWord1 == "khu" and object.word == "chế" and object.nextWord1 == "xuất" : object.conclusion = "B"
+			object.nextWord2 == "rộng" : object.conclusion = "I"
+		object.prevTag2 == "B" and object.prevTag1 == "I" and object.word == "trưởng" : object.conclusion = "B"
+			object.prevWord2 == "tham" and object.word == "trưởng" : object.conclusion = "I"
+			object.prevWord2 == "kế" and object.word == "trưởng" : object.conclusion = "I"
+		object.prevWord1 == "để" and object.word == "trở" : object.conclusion = "B"
+		object.word == "xe" and object.nextWord1 == "bò" : object.conclusion = "B"
+		object.prevTag1 == "B" and object.word == "được" : object.conclusion = "B"
+		object.prevTag1 == "B" and object.word == "hôm" and object.nextTag1 == "B" : object.conclusion = "B"
+			object.word == "hôm" and object.nextWord1 == "sau" and object.nextWord2 == "(" : object.conclusion = "I"
+		object.prevWord1 == "gọi" and object.word == "là" : object.conclusion = "B"
+			object.prevWord1 == "gọi" and object.word == "là" and object.nextWord1 == "hang" : object.conclusion = "I"
+			object.prevWord1 == "gọi" and object.word == "là" and object.nextWord1 == "luật" : object.conclusion = "I"
+			object.prevWord2 == "giống" : object.conclusion = "I"
+			object.word == "là" and object.nextWord1 == "sự" and object.nextWord2 == "nhiễm" : object.conclusion = "I"
+			object.nextWord2 == "được" : object.conclusion = "I"
+			object.word == "là" and object.nextWord2 == "môn" : object.conclusion = "I"
+			object.prevWord2 == "anh" and object.prevWord1 == "gọi" and object.word == "là" : object.conclusion = "I"
+			object.nextWord2 == "độc" : object.conclusion = "I"
+			object.word == "là" and object.nextWord2 == "thủ" : object.conclusion = "I"
+			object.nextWord2 == ";" : object.conclusion = "I"
+		object.word == "cuộc" and object.nextWord1 == "sống" : object.conclusion = "B"
+		object.word == "việt" and object.nextWord1 == "nam" : object.conclusion = "B"
+		object.prevWord2 == "lớn" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "chánh" and object.word == "văn" and object.nextWord1 == "phòng" : object.conclusion = "B"
+		object.word == "vật" and object.nextWord1 == "chất" : object.conclusion = "B"
+		object.word == "chi" and object.nextWord1 == "phí" : object.conclusion = "B"
+		object.prevWord2 == "thứ" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "chuyển" and object.word == "tiền" : object.conclusion = "B"
+			object.prevWord2 == "thư" and object.prevWord1 == "chuyển" and object.word == "tiền" : object.conclusion = "I"
+		object.prevWord2 == "nhiều" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "lên" and object.word == "mặt" : object.conclusion = "B"
+		object.word == "thực" and object.nextWord1 == "hiện" : object.conclusion = "B"
+		object.prevWord1 == "nên" and object.word == "người" : object.conclusion = "B"
+		object.prevWord1 == "cao" and object.word == "trình" and object.nextWord1 == "độ" : object.conclusion = "B"
+		object.prevWord1 == "cửa" and object.word == "nhà" : object.conclusion = "B"
+		object.prevWord1 == "hãng" : object.conclusion = "B"
+		object.prevWord1 == "xoá" and object.word == "mù" : object.conclusion = "B"
+		object.prevWord1 == "nguyên" and object.word == "chủ" : object.conclusion = "B"
+		object.prevWord1 == "đường" and object.word == "dây" and object.nextWord1 == "điện" : object.conclusion = "B"
+			object.word == "dây" and object.nextWord2 == "thoại" : object.conclusion = "I"
+		object.word == "luật" and object.nextWord1 == "tố" and object.nextWord2 == "tụng" : object.conclusion = "B"
+		object.prevWord2 == "triệu" and object.word == "tiền" : object.conclusion = "B"
+		object.prevWord1 == "biết" and object.nextWord1 == "giờ" : object.conclusion = "B"
+		object.word == "riêng" and object.nextWord1 == "lẻ" : object.conclusion = "B"
+		object.word == "thể" and object.nextWord1 == "hiện" : object.conclusion = "B"
+			object.word == "thể" and object.nextWord2 == "nay" : object.conclusion = "I"
+		object.prevWord1 == "tay" and object.word == "cầm" : object.conclusion = "B"
+		object.prevWord2 == "cao" and object.word == "là" : object.conclusion = "B"
+		object.word == "học" and object.nextWord1 == "tập" : object.conclusion = "B"
+		object.prevWord2 == "sợ" and object.prevWord1 == "nhất" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "thứ" and object.word == "năm" : object.conclusion = "B"
+			object.prevWord2 == "hôm" : object.conclusion = "I"
+		object.word == "công" and object.nextWord1 == "tác" : object.conclusion = "B"
+		object.word == "vận" and object.nextWord1 == "chuyển" : object.conclusion = "B"
+		object.prevWord1 == "mẹ" and object.word == "già" : object.conclusion = "B"
+		object.prevWord1 == "mặt" and object.word == "đường" : object.conclusion = "B"
+			object.prevWord2 == "hỏng" : object.conclusion = "I"
+			object.prevWord2 == "nhựa" and object.prevWord1 == "mặt" and object.word == "đường" : object.conclusion = "I"
+		object.prevWord1 == "khi" : object.conclusion = "B"
+		object.prevWord1 == "đang" : object.conclusion = "B"
+		object.word == "lý" and object.nextWord1 == "do" : object.conclusion = "B"
+		object.word == "nguyễn" and object.nextTag1 == "I" : object.conclusion = "B"
+		object.prevWord1 == "sự" and object.word == "biến" : object.conclusion = "B"
+		object.word == "mặt" and object.nextWord1 == "bằng" : object.conclusion = "B"
+		object.word == "số" and object.nextWord1 == "lượng" : object.conclusion = "B"
+		object.prevWord1 == "quyền" and object.word == "hành" : object.conclusion = "B"
+		object.word == "tp" : object.conclusion = "B"
+		object.nextWord1 == "cáo" : object.conclusion = "B"
+			object.nextTag1 == "B" and object.nextTag2 == "I" : object.conclusion = "I"
+		object.word == "chiến" and object.nextWord1 == "đấu" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.word == "hàng" : object.conclusion = "B"
+		object.word == "nước" and object.nextWord1 == "ngọt" : object.conclusion = "B"
+		object.prevWord1 == "một" and object.word == "số" and object.nextWord1 == "ít" : object.conclusion = "B"
+		object.prevWord2 == "đứng" and object.word == "cơ" : object.conclusion = "B"
+		object.word == "phương" and object.nextWord1 == "án" : object.conclusion = "B"
+		object.word == "chế" and object.nextWord1 == "biến" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.word == "đầu" : object.conclusion = "B"
+		object.prevWord1 == "số" and object.word == "gia" : object.conclusion = "B"
+		object.word == "đang" and object.nextWord2 == "triển" : object.conclusion = "B"
+		object.prevWord1 == "con" and object.word == "thứ" : object.conclusion = "B"
+		object.word == "đầu" and object.nextWord1 == "tiên" : object.conclusion = "B"
+		object.prevWord1 == "thu" and object.word == "ngân" and object.nextWord1 == "sách" : object.conclusion = "B"
+		object.prevWord1 == "ông" and object.word == "hoàng" : object.conclusion = "B"
+		object.word == "giá" and object.nextWord1 == "trị" : object.conclusion = "B"
+			object.word == "giá" and object.nextTag1 == "I" : object.conclusion = "I"
+		object.prevWord2 == "chậm" and object.word == "là" : object.conclusion = "B"
+		object.word == "máy" and object.nextWord1 == "bay" : object.conclusion = "B"
+		object.prevWord1 == "vào" and object.word == "cầu" : object.conclusion = "B"
+		object.prevWord1 == "ba" and object.word == "bốn" : object.conclusion = "B"
+		object.word == "vị" and object.nextWord1 == "trí" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.word == "vì" : object.conclusion = "B"
+		object.prevWord1 == "ai" and object.word == "bảo" : object.conclusion = "B"
+		object.word == "khí" and object.nextWord1 == "tượng" : object.conclusion = "B"
+		object.word == "kế" and object.nextWord1 == "hoạch" : object.conclusion = "B"
+		object.prevWord1 == "báo" and object.nextWord1 == "an" : object.conclusion = "B"
+		object.prevWord2 == "mỗi" and object.word == "một" : object.conclusion = "B"
+		object.prevWord1 == "sang" and object.word == "năm" : object.conclusion = "B"
+		object.prevWord1 == "chị" and object.word == "ta" : object.conclusion = "B"
+		object.prevWord1 == "từ" and object.word == "thực" : object.conclusion = "B"
+		object.prevWord2 == "tỉ" and object.word == "tiền" : object.conclusion = "B"
+		object.prevWord1 == "như" and object.word == "thế" and object.nextWord1 == "là" : object.conclusion = "B"
+		object.prevWord1 == "mới" and object.word == "phải" : object.conclusion = "B"
+		object.word == "tài" and object.nextWord1 == "xế" : object.conclusion = "B"
+		object.prevWord1 == "người" and object.word == "thương" : object.conclusion = "B"
+			object.prevWord1 == "người" and object.word == "thương" and object.nextWord1 == "''" : object.conclusion = "I"
+			object.nextWord1 == "..." : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "trần" and object.nextTag1 == "I" : object.conclusion = "B"
+		object.prevWord1 == "từ" and object.word == "nguyên" : object.conclusion = "B"
+		object.prevWord1 == "tổng" and object.word == "thu" and object.nextWord1 == "nhập" : object.conclusion = "B"
+		object.prevWord1 == "con" and object.word == "nhà" : object.conclusion = "B"
+		object.prevWord1 == "giày" and object.word == "an" and object.nextWord1 == "giang" : object.conclusion = "B"
+		object.prevWord1 == "ra" and object.word == "công" : object.conclusion = "B"
+		object.word == "nghiệp" and object.nextWord1 == "vụ" : object.conclusion = "B"
+		object.prevWord1 == "một" and object.word == "đôi" : object.conclusion = "B"
+		object.prevWord1 == "tiếng" and object.word == "động" and object.nextWord1 == "cơ" : object.conclusion = "B"
+		object.prevWord1 == "bán" and object.word == "nước" : object.conclusion = "B"
+		object.prevWord2 == "tốt" and object.prevWord1 == "nhất" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "ông" and object.word == "từ" : object.conclusion = "B"
+		object.prevWord1 == "người" and object.word == "dân" and object.nextWord1 == "nghèo" : object.conclusion = "B"
+		object.prevWord1 == "cụm" and object.word == "cảng" and object.nextWord1 == "hàng" : object.conclusion = "B"
+		object.prevWord1 == "đường" and object.word == "biên" and object.nextWord1 == "ngang" : object.conclusion = "B"
+		object.word == "chi" and object.nextWord1 == "tiêu" : object.conclusion = "B"
+		object.word == "viên" and object.nextWord1 == "chức" : object.conclusion = "B"
+		object.prevWord1 == "hồ" and object.word == "thuỷ" : object.conclusion = "B"
+		object.word == "thủ" and object.nextWord1 == "thiêm" : object.conclusion = "B"
+		object.prevWord1 == "lại" and object.word == "người" : object.conclusion = "B"
+		object.prevWord2 == "cần" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "chậm" and object.word == "tiến" and object.nextWord1 == "độ" : object.conclusion = "B"
+		object.prevWord1 == "độ" and object.word == "phì" and object.nextWord1 == "nhiêu" : object.conclusion = "B"
+		object.word == "tiêu" and object.nextWord1 == "thụ" : object.conclusion = "B"
+		object.prevWord1 == "phủ" and object.word == "chủ" : object.conclusion = "B"
+		object.word == "hà" and object.nextWord1 == "nội" : object.conclusion = "B"
+		object.prevWord1 == "từ" and object.word == "căn" : object.conclusion = "B"
+		object.word == "mặt" and object.nextWord1 == "hàng" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "nhân" : object.conclusion = "B"
+		object.prevWord1 == "con" and object.word == "người" and object.nextWord1 == "ở" : object.conclusion = "B"
+		object.prevWord1 == "người" and object.nextWord1 == "tuổi" : object.conclusion = "B"
+		object.word == "xe" and object.nextWord1 == "khách" : object.conclusion = "B"
+		object.prevWord1 == "thấy" and object.word == "bà" : object.conclusion = "B"
+		object.prevWord1 == "tự" and object.word == "quyết" and object.nextWord1 == "định" : object.conclusion = "B"
+		object.prevWord1 == "cô" and object.word == "bác" and object.nextWord1 == "sĩ" : object.conclusion = "B"
+		object.prevWord1 == "xe" and object.word == "tải" and object.nextWord1 == "trọng" : object.conclusion = "B"
+		object.prevWord2 == "chỉ" and object.prevWord1 == "huy" and object.word == "sở" : object.conclusion = "B"
+		object.prevWord1 == "người" and object.word == "ngoài" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "tuổi" : object.conclusion = "B"
+		object.word == "nước" and object.nextWord1 == "ngoài" : object.conclusion = "B"
+		object.prevWord1 == "như" and object.nextWord1 == "lệ" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.word == "bộ" : object.conclusion = "B"
+			object.prevWord1 == "làm" and object.word == "bộ" and object.nextWord1 == "ngạc" : object.conclusion = "I"
+		object.prevWord1 == "sự" and object.word == "chủ" : object.conclusion = "B"
+		object.word == "công" and object.nextWord1 == "bố" : object.conclusion = "B"
+		object.prevWord1 == "nước" and object.word == "dùng" : object.conclusion = "B"
+		object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "trước" : object.conclusion = "B"
+			object.prevWord1 == "thuế" and object.nextWord1 == "bạ" : object.conclusion = "I"
+			object.prevWord2 == "''" and object.word == "trước" : object.conclusion = "I"
+			object.prevWord2 == "," : object.conclusion = "I"
+			object.word == "trước" and object.nextWord1 == "người" and object.nextWord2 == "brâu" : object.conclusion = "I"
+			object.prevWord1 == "ngày" and object.word == "trước" and object.nextWord1 == "của" : object.conclusion = "I"
+		object.word == "xe" and object.nextWord1 == "tải" : object.conclusion = "B"
+		object.prevWord1 == "chất" and object.word == "độc" and object.nextWord1 == "hại" : object.conclusion = "B"
+		object.word == "thuế" and object.nextWord1 == "thu" : object.conclusion = "B"
+		object.prevWord1 == "từ" and object.word == "chương" and object.nextWord1 == "trình" : object.conclusion = "B"
+		object.prevWord1 == "kim" and object.word == "đồng" and object.nextWord1 == "hồ" : object.conclusion = "B"
+		object.prevWord1 == "hội" and object.word == "kiến" and object.nextWord1 == "trúc" : object.conclusion = "B"
+		object.prevWord1 == "nước" and object.word == "giải" and object.nextWord1 == "khát" : object.conclusion = "B"
+		object.word == "cao" and object.nextWord1 == "nguyên" : object.conclusion = "B"
+		object.prevWord2 == "điều" and object.prevWord1 == "kiện" and object.word == "cần" : object.conclusion = "B"
+		object.prevWord1 == "như" and object.word == "không" : object.conclusion = "B"
+			object.nextWord2 == "" : object.conclusion = "I"
+		object.word == "đà" and object.nextTag1 == "I" and object.nextTag2 == "B" : object.conclusion = "B"
+		object.prevWord2 == "lá" and object.prevWord1 == "cờ" and object.word == "đỏ" : object.conclusion = "B"
+		object.prevWord2 == "cứ" and object.prevWord1 == "thế" and object.word == "mà" : object.conclusion = "B"
+		object.prevWord1 == "lúa" and object.nextWord1 == "xuân" : object.conclusion = "B"
+		object.prevWord1 == "số" and object.word == "nhân" : object.conclusion = "B"
+		object.word == "đoàn" and object.nextWord1 == "chủ" : object.conclusion = "B"
+		object.word == "hàng" and object.nextWord1 == "xóm" : object.conclusion = "B"
+		object.prevWord2 == "nặng" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "mang" : object.conclusion = "B"
+		object.prevWord1 == "cục" and object.word == "tác" and object.nextWord1 == "chiến" : object.conclusion = "B"
+		object.prevWord1 == "vùng" and object.nextWord1 == "biển" : object.conclusion = "B"
+		object.word == "mặt" and object.nextWord1 == "đường" : object.conclusion = "B"
+		object.prevWord1 == "đoàn" and object.word == "bộ" : object.conclusion = "B"
+		object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "ii" : object.conclusion = "B"
+		object.word == "thời" and object.nextWord1 == "gian" : object.conclusion = "B"
+		object.prevWord1 == "phải" and object.word == "cái" : object.conclusion = "B"
+		object.prevWord2 == "" and object.prevWord1 == "đến" and object.word == "nơi" : object.conclusion = "B"
+		object.prevWord1 == "từ" and object.word == "thông" : object.conclusion = "B"
+		object.word == "công" and object.nextWord1 == "nhân" : object.conclusion = "B"
+		object.word == "giá" and object.nextWord1 == "thành" : object.conclusion = "B"
+		object.prevWord1 == "số" and object.word == "học" : object.conclusion = "B"
+		object.prevWord1 == "nữ" and object.nextWord1 == "hùng" : object.conclusion = "B"
+		object.word == "thành" and object.nextWord1 == "phố" : object.conclusion = "B"
+			object.prevWord1 == "hình" : object.conclusion = "I"
+		object.word == "thủ" and object.nextWord1 == "tướng" : object.conclusion = "B"
+		object.word == "hành" and object.nextWord1 == "chính" and object.nextWord2 == "sự" : object.conclusion = "B"
+		object.word == "thế" and object.nextWord1 == "thì" : object.conclusion = "B"
+		object.prevWord1 == "đánh" and object.word == "bóng" : object.conclusion = "B"
+		object.prevWord1 == "đi" and object.word == "ở" : object.conclusion = "B"
+		object.word == "sức" and object.nextWord1 == "mạnh" : object.conclusion = "B"
+		object.prevWord1 == "tuổi" and object.word == "tôi" : object.conclusion = "B"
+		object.word == "mặt" and object.nextWord2 == "cực" : object.conclusion = "B"
+		object.word == "thư" and object.nextWord2 == "toà" : object.conclusion = "B"
+		object.prevWord1 == "một" and object.word == "thể" : object.conclusion = "B"
+		object.prevWord2 == "với" and object.prevWord1 == "con" and object.word == "cái" : object.conclusion = "B"
+		object.prevWord1 == "cờ" and object.word == "quý" : object.conclusion = "B"
+		object.word == "số" and object.nextWord1 == "phận" : object.conclusion = "B"
+		object.prevWord1 == "bên" and object.nextWord1 == "đơn" : object.conclusion = "B"
+		object.prevWord1 == "lái" and object.word == "xe" and object.nextWord1 == "ôm" : object.conclusion = "B"
+		object.word == "đạo" and object.nextWord1 == "đức" : object.conclusion = "B"
+		object.prevWord2 == "những" and object.prevWord1 == "người" and object.word == "yêu" : object.conclusion = "B"
+		object.word == "sinh" and object.nextWord1 == "viên" : object.conclusion = "B"
+		object.prevWord1 == "lên" and object.word == "cơn" and object.nextWord1 == "sốt" : object.conclusion = "B"
+		object.word == "từ" and object.nextWord2 == "-" : object.conclusion = "B"
+		object.word == "kiến" and object.nextWord1 == "thức" : object.conclusion = "B"
+		object.prevWord1 == "chính" and object.word == "sự" : object.conclusion = "B"
+		object.prevWord1 == "lại" and object.word == "giống" : object.conclusion = "B"
+		object.prevWord1 == "màu" and object.nextWord1 == "đen" : object.conclusion = "B"
+		object.prevWord2 == "đây" and object.prevWord1 == "nhất" and object.word == "là" : object.conclusion = "B"
+		object.prevWord2 == "khó" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "thưa" and object.nextWord1 == "," : object.conclusion = "B"
+			object.word == "thớt" : object.conclusion = "I"
+		object.word == "chủ" and object.nextWord1 == "lực" : object.conclusion = "B"
+		object.prevWord1 == "trăm" and object.nextWord1 == "đồng" : object.conclusion = "B"
+		object.prevWord1 == "nghỉ" and object.word == "dưỡng" and object.nextWord1 == "sức" : object.conclusion = "B"
+		object.prevWord1 == "đến" and object.nextWord1 == "ở" : object.conclusion = "B"
+		object.prevWord1 == "đèn" and object.word == "chiếu" and object.nextWord1 == "sáng" : object.conclusion = "B"
+			object.prevWord2 == "," : object.conclusion = "I"
+		object.prevWord1 == "rồi" and object.word == "ra" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "điều" and object.nextWord1 == "chỉnh" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.word == "việc" and object.nextWord1 == "gì" : object.conclusion = "B"
+		object.word == "cờ" and object.nextWord1 == "quý" and object.nextWord2 == "tị" : object.conclusion = "B"
+		object.prevWord1 == "gây" and object.word == "sự" : object.conclusion = "B"
+		object.word == "huyết" and object.nextWord1 == "mạch" : object.conclusion = "B"
+		object.prevWord1 == "nhà" and object.nextWord1 == "môn" : object.conclusion = "B"
+			object.nextWord2 == "," : object.conclusion = "I"
+		object.prevWord1 == "có" and object.word == "học" : object.conclusion = "B"
+		object.prevTag1 == "I" and object.word == "phó" : object.conclusion = "B"
+		object.prevWord2 == "con" and object.prevWord1 == "đường" and object.word == "dẫn" : object.conclusion = "B"
+		object.prevWord1 == "mở" and object.nextWord1 == "bay" : object.conclusion = "B"
+		object.word == "tình" and object.nextWord1 == "cờ" : object.conclusion = "B"
+		object.word == "sinh" and object.nextWord1 == "sống" : object.conclusion = "B"
+		object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "vào" : object.conclusion = "B"
+			object.prevWord2 == "''" : object.conclusion = "I"
+			object.nextWord1 == "đấy" : object.conclusion = "I"
+			object.prevWord1 == "đầu" and object.word == "vào" and object.nextWord1 == "năm" : object.conclusion = "I"
+		object.prevWord1 == "cùng" and object.word == "dân" : object.conclusion = "B"
+		object.prevWord1 == "ba" and object.word == "không" : object.conclusion = "B"
+		object.prevWord2 == "tháng" and object.word == "hai" : object.conclusion = "B"
+		object.prevWord2 == "" and object.prevWord1 == "anh" and object.word == "minh" : object.conclusion = "B"
+		object.prevWord1 == "không" and object.word == "vận" : object.conclusion = "B"
+		object.prevWord1 == "không" and object.word == "đâu" : object.conclusion = "B"
+		object.prevWord1 == "lúa" and object.word == "hè" : object.conclusion = "B"
+		object.prevWord1 == "cấp" and object.nextWord1 == "sạch" : object.conclusion = "B"
+		object.prevWord2 == "dựng" and object.prevWord1 == "chợ" and object.word == "mới" : object.conclusion = "B"
+		object.word == "học" and object.nextWord1 == "sinh" and object.nextWord2 == "miền" : object.conclusion = "B"
+		object.prevWord1 == "ra" and object.word == "hiệu" and object.nextWord1 == "ứng" : object.conclusion = "B"
+		object.prevWord2 == "" and object.prevWord1 == "anh" and object.word == "vũ" : object.conclusion = "B"
+		object.prevWord1 == "tiền" and object.word == "tiêu" : object.conclusion = "B"
+		object.word == "chiều" and object.nextWord1 == "qua" and object.nextWord2 == "cầu" : object.conclusion = "B"
+		object.prevTag1 == "B" and object.word == "i" : object.conclusion = "B"
+		object.prevWord2 == "lo" and object.prevWord1 == "nhất" and object.word == "là" : object.conclusion = "B"
+		object.prevTag1 == "B" and object.word == "chưa" and object.nextTag1 == "B" : object.conclusion = "B"
+		object.prevWord2 == "nói" and object.prevWord1 == "lên" and object.word == "tiếng" : object.conclusion = "B"
+		object.prevWord1 == "mặt" and object.word == "nước" and object.nextWord1 == "biển" : object.conclusion = "B"
+		object.prevWord1 == "hết" and object.word == "đời" : object.conclusion = "B"
+		object.prevWord1 == "mart" : object.conclusion = "B"
+		object.prevWord1 == "chạm" and object.nextWord1 == "đất" : object.conclusion = "B"
+		object.prevWord1 == "trong" and object.word == "sáng" and object.nextWord1 == "nay" : object.conclusion = "B"
+		object.prevWord1 == "dải" and object.word == "đồng" : object.conclusion = "B"
+		object.prevWord1 == "bất" and object.nextWord1 == "thường" : object.conclusion = "B"
+		object.prevWord1 == "chủ" and object.word == "công" and object.nextWord1 == "trình" : object.conclusion = "B"
+		object.prevWord1 == "lời" and object.nextWord1 == "thích" : object.conclusion = "B"
+		object.prevWord1 == "ngã" and object.word == "năm" : object.conclusion = "B"
+			object.word == "năm" and object.nextWord1 == "và" and object.nextWord2 == "thạnh" : object.conclusion = "I"
+		object.prevWord1 == "có" and object.word == "điều" and object.nextWord1 == "gì" : object.conclusion = "B"
+		object.word == "giải" and object.nextWord1 == "đáp" : object.conclusion = "B"
+		object.prevWord1 == "một" and object.nextWord1 == "mầu" : object.conclusion = "B"
+		object.prevWord1 == "đường" and object.word == "tàu" : object.conclusion = "B"
+		object.word == "cây" and object.nextWord1 == "cầu" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "hạn" and object.nextWord1 == "ngạch" : object.conclusion = "B"
+		object.word == "thuốc" and object.nextWord1 == "nam" : object.conclusion = "B"
+		object.prevWord1 == "nữ" and object.nextWord1 == "cướp" : object.conclusion = "B"
+		object.prevWord1 == "đến" and object.word == "điều" : object.conclusion = "B"
+		object.prevWord1 == "từng" and object.word == "lớp" : object.conclusion = "B"
+		object.prevWord1 == "tự" and object.word == "quản" and object.nextWord1 == "lý" : object.conclusion = "B"
+		object.prevWord1 == "nước" and object.word == "nặng" : object.conclusion = "B"
+		object.prevWord1 == "tại" and object.nextWord1 == "đình" : object.conclusion = "B"
+		object.prevWord1 == "nhiều" and object.word == "chuyện" : object.conclusion = "B"
+		object.prevWord1 == "ba" and object.word == "lá" : object.conclusion = "B"
+		object.word == "thể" and object.nextWord1 == "thao" : object.conclusion = "B"
+		object.prevWord1 == "đường" and object.nextWord1 == "đỏ" : object.conclusion = "B"
+		object.prevWord2 == "nhỏ" and object.prevWord1 == "nhất" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "ra" and object.word == "điều" : object.conclusion = "B"
+		object.prevWord1 == "cước" and object.word == "vận" : object.conclusion = "B"
+		object.prevWord2 == "thấp" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "mất" and object.word == "sức" and object.nextWord1 == "lao" : object.conclusion = "B"
+		object.prevWord1 == "báo" and object.word == "nói" : object.conclusion = "B"
+		object.prevWord1 == "mở" and object.word == "cửa" and object.nextWord1 == "hàng" : object.conclusion = "B"
+		object.prevWord1 == "tai" and object.word == "nghe" : object.conclusion = "B"
+			object.nextWord1 == "mắt" : object.conclusion = "I"
+		object.prevWord2 == "rõ" and object.prevWord1 == "nhất" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "điện" and object.word == "từ" : object.conclusion = "B"
+		object.prevWord1 == "hết" and object.word == "nước" : object.conclusion = "B"
+		object.prevWord2 == "" and object.prevWord1 == "rừng" and object.word == "núi" : object.conclusion = "B"
+		object.prevWord1 == "đường" and object.word == "biên" and object.nextWord1 == "dọc" : object.conclusion = "B"
+		object.word == "sở" and object.nextWord2 == "trí" : object.conclusion = "B"
+			object.prevWord2 == "vệ" and object.word == "sở" : object.conclusion = "I"
+		object.prevWord1 == "người" and object.nextWord1 == "quê" : object.conclusion = "B"
+		object.prevWord1 == "chợ" and object.nextWord1 == "xây" : object.conclusion = "B"
+		object.word == "xác" and object.nextWord1 == "định" : object.conclusion = "B"
+		object.prevWord1 == "năm" and object.word == "tháng" and object.nextWord1 == "nay" : object.conclusion = "B"
+		object.prevWord2 == "" and object.prevWord1 == "anh" and object.word == "sơn" : object.conclusion = "B"
+		object.word == "tay" and object.nextWord1 == "trái" : object.conclusion = "B"
+		object.prevWord1 == "do" and object.nextWord1 == "án" : object.conclusion = "B"
+		object.word == "ra" and object.nextWord1 == "đường" : object.conclusion = "B"
+		object.word == "mẹ" and object.nextWord1 == "con" : object.conclusion = "B"
+		object.prevWord1 == "nước" and object.word == "rút" and object.nextWord1 == "." : object.conclusion = "B"
+		object.prevWord1 == "học" and object.word == "chính" : object.conclusion = "B"
+		object.word == "mặt" and object.nextWord1 == "tiền" : object.conclusion = "B"
+		object.prevWord1 == "thành" and object.word == "sự" : object.conclusion = "B"
+		object.prevWord1 == "vào" and object.word == "đề" : object.conclusion = "B"
+		object.prevWord1 == "trồng" and object.word == "chuối" : object.conclusion = "B"
+		object.prevWord2 == "nhờ" and object.word == "mà" : object.conclusion = "B"
+			object.nextTag1 == "B" and object.nextTag2 == "I" : object.conclusion = "I"
+		object.prevWord1 == "hết" and object.word == "đất" : object.conclusion = "B"
+		object.prevWord1 == "cty" : object.conclusion = "B"
+		object.word == "công" and object.nextWord1 == "viên" : object.conclusion = "B"
+		object.prevWord1 == "nhà" and object.word == "hảo" : object.conclusion = "B"
+		object.prevWord2 == "của" and object.prevWord1 == "việc" and object.word == "làm" : object.conclusion = "B"
+		object.prevWord1 == "người" and object.nextWord1 == "" : object.conclusion = "B"
+		object.prevWord1 == "là" and object.word == "cùng" : object.conclusion = "B"
+			object.word == "cùng" and object.nextWord1 == "''" : object.conclusion = "I"
+		object.prevWord2 == "gọi" and object.word == "báo" : object.conclusion = "B"
+		object.word == "chi" and object.nextWord1 == "trả" : object.conclusion = "B"
+		object.word == "báo" and object.nextWord1 == "tử" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.nextWord1 == "dễ" : object.conclusion = "B"
+		object.word == "ngày" and object.nextWord1 == "khác" : object.conclusion = "B"
+		object.word == "phân" and object.nextTag1 == "B" and object.nextTag2 == "B" : object.conclusion = "B"
+		object.word == "năng" and object.nextWord1 == "động" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.word == "chứng" and object.nextWord1 == "minh" : object.conclusion = "B"
+		object.prevWord1 == "nước" and object.word == "trắng" : object.conclusion = "B"
+		object.prevWord2 == "đông" and object.word == "là" : object.conclusion = "B"
+		object.word == "bác" and object.nextWord1 == "hồ" : object.conclusion = "B"
+		object.prevWord1 == "thành" and object.nextWord1 == "ty" : object.conclusion = "B"
+		object.prevTag2 == "B" and object.prevTag1 == "I" and object.word == "vườn" : object.conclusion = "B"
+		object.prevWord1 == "đất" and object.word == "màu" and object.nextWord1 == "mỡ" : object.conclusion = "B"
+		object.word == "tròn" and object.nextWord1 == "nghĩa" and object.nextWord2 == "vụ" : object.conclusion = "B"
+		object.word == "nghìn" and object.nextTag1 == "B" and object.nextTag2 == "B" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "lí" and object.nextWord1 == "do" : object.conclusion = "B"
+		object.word == "thế" and object.nextWord1 == "giới" : object.conclusion = "B"
+		object.word == "hoà" and object.nextWord1 == "bình" and object.nextWord2 == "2004" : object.conclusion = "B"
+		object.prevWord1 == "tuổi" and object.word == "trẻ" and object.nextWord1 == "em" : object.conclusion = "B"
+		object.prevWord2 == "nuôi" and object.word == "nhỏ" : object.conclusion = "B"
+		object.prevWord2 == "" and object.prevWord1 == "anh" and object.word == "tài" : object.conclusion = "B"
+		object.word == "đối" and object.nextWord1 == "tượng" : object.conclusion = "B"
+		object.prevWord1 == "đào" and object.word == "sâu" : object.conclusion = "B"
+			object.word == "sâu" and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord1 == "trăm" and object.word == "năm" and object.nextWord1 == "nay" : object.conclusion = "B"
+		object.prevWord2 == "hành" and object.prevWord1 == "thanh" and object.word == "kiểm" : object.conclusion = "B"
+		object.prevWord1 == "tái" and object.nextWord1 == "xuất" : object.conclusion = "B"
+		object.prevWord1 == "như" and object.nextWord1 == "tra" : object.conclusion = "B"
+		object.word == "phụ" and object.nextWord1 == "trợ" : object.conclusion = "B"
+		object.prevWord2 == "miền" and object.prevWord1 == "đông" and object.word == "nam" : object.conclusion = "B"
+		object.prevWord1 == "vietsovpetro" and object.nextWord1 == "quốc" : object.conclusion = "B"
+		object.word == "cùng" and object.nextWord1 == "cực" : object.conclusion = "B"
+		object.prevWord1 == "cầu" and object.word == "nối" and object.nextWord1 == "tiếp" : object.conclusion = "B"
+		object.prevWord2 == "thì" and object.prevWord1 == "hay" and object.word == "biết" : object.conclusion = "B"
+		object.prevWord1 == "phá" and object.word == "nước" and object.nextWord1 == "''" : object.conclusion = "B"
+		object.prevWord2 == "vực" and object.prevWord1 == "bắc" and object.word == "trung" : object.conclusion = "B"
+		object.prevWord1 == "ngành" and object.word == "hàng" and object.nextWord1 == "không" : object.conclusion = "B"
+		object.prevWord1 == "không" and object.word == "trung" and object.nextWord1 == "thực" : object.conclusion = "B"
+		object.word == "cầu" and object.nextWord1 == "khỉ" : object.conclusion = "B"
+		object.prevWord1 == "làm" and object.nextWord1 == "đề" : object.conclusion = "B"
+		object.prevWord2 == "thích" and object.prevWord1 == "nhất" and object.word == "là" : object.conclusion = "B"
+		object.word == "tối" and object.nextWord1 == "trời" : object.conclusion = "B"
+		object.prevWord1 == "không" and object.word == "phận" and object.nextWord1 == "sự" : object.conclusion = "B"
+		object.word == "viện" and object.nextWord1 == "phí" : object.conclusion = "B"
+		object.prevWord1 == "giảm" and object.word == "tốc" and object.nextWord1 == "độ" : object.conclusion = "B"
+		object.prevWord2 == "" and object.prevWord1 == "thứ" and object.word == "sáu" : object.conclusion = "B"
+		object.prevWord1 == "dạng" and object.word == "hình" : object.conclusion = "B"
+		object.prevWord1 == "chỉ" and object.word == "giới" and object.nextWord1 == "hạn" : object.conclusion = "B"
+		object.word == "trọng" and object.nextWord1 == "lượng" : object.conclusion = "B"
+		object.word == "tưởng" and object.nextWord1 == "tượng" : object.conclusion = "B"
+		object.nextWord1 == "tắp" : object.conclusion = "B"
+		object.prevWord1 == "mất" and object.nextWord1 == "nhớ" : object.conclusion = "B"
+		object.prevWord1 == "cỗ" and object.nextWord1 == "quan" : object.conclusion = "B"
+		object.prevWord1 == "đường" and object.word == "mật" : object.conclusion = "B"
+		object.prevWord1 == "việc" and object.nextWord1 == "ăn" : object.conclusion = "B"
+		object.word == "già" and object.nextWord2 == "," : object.conclusion = "B"
+		object.word == "khách" and object.nextWord1 == "sạn" : object.conclusion = "B"
+		object.word == "ngất" and object.nextWord1 == "ngưởng" : object.conclusion = "B"
+		object.prevWord1 == "người" and object.nextWord1 == "cầu" : object.conclusion = "B"
+		object.prevWord2 == "3" and object.word == "tuổi" : object.conclusion = "B"
+		object.word == "vậy" and object.nextWord2 == "cái" : object.conclusion = "B"
+		object.word == "mặt" and object.nextWord1 == "nạ" : object.conclusion = "B"
+		object.prevWord1 == "nào" and object.word == "hay" : object.conclusion = "B"
+		object.prevWord1 == "wave" : object.conclusion = "B"
+		object.word == "ý" and object.nextWord2 == "của" : object.conclusion = "B"
+			object.word == "ý" and object.nextWord1 == "mua" : object.conclusion = "I"
+		object.prevWord1 == "như" and object.nextWord1 == ":" : object.conclusion = "B"
+		object.prevWord2 == "ba" and object.prevWord1 == "ngày" and object.word == "nay" : object.conclusion = "B"
+		object.word == "mỹ" and object.nextWord1 == "sơn" : object.conclusion = "B"
+		object.prevWord1 == "khoán" and object.word == "sản" : object.conclusion = "B"
+		object.prevWord2 == "hai" and object.word == "một" : object.conclusion = "B"
+		object.word == "tâm" and object.nextWord1 == "sự" : object.conclusion = "B"
+		object.prevWord1 == "âu" and object.word == "là" : object.conclusion = "B"
+		object.word == "một" and object.nextWord1 == "số" : object.conclusion = "B"
+		object.prevWord1 == "công" and object.word == "lao" and object.nextWord1 == "động" : object.conclusion = "B"
+		object.prevWord2 == "mạnh" and object.prevWord1 == "ai" and object.word == "nấy" : object.conclusion = "B"
+		object.prevWord1 == "ông" and object.word == "mãnh" : object.conclusion = "B"
+		object.prevWord2 == "hai" and object.word == "nhỏ" : object.conclusion = "B"
+		object.prevWord1 == "người" and object.nextWord1 == "làng" : object.conclusion = "B"
+		object.prevWord2 == "năm" and object.prevWord1 == "năm" and object.word == "tuổi" : object.conclusion = "B"
+		object.word == "lê" and object.nextWord1 == "văn" : object.conclusion = "B"
+		object.word == "tiêu" and object.nextWord1 == "huỷ" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "hạng" and object.nextWord1 == "mục" : object.conclusion = "B"
+		object.word == "giới" and object.nextWord1 == "thiệu" : object.conclusion = "B"
+		object.prevWord1 == "ông" and object.word == "gia" : object.conclusion = "B"
+		object.prevWord1 == "môn" and object.word == "sinh" and object.nextWord1 == "lớp" : object.conclusion = "B"
+		object.prevWord2 == "một" and object.prevWord1 == "ngày" and object.word == "một" : object.conclusion = "B"
+		object.prevTag2 == "B" and object.prevTag1 == "I" and object.word == "phải" : object.conclusion = "B"
+		object.prevWord2 == "bước" and object.prevWord1 == "vào" and object.word == "cuộc" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.nextWord1 == "khoản" : object.conclusion = "B"
+		object.word == "nghĩa" and object.nextWord1 == "trang" : object.conclusion = "B"
+		object.prevWord1 == "y" and object.word == "dược" and object.nextWord1 == "," : object.conclusion = "B"
+		object.prevWord1 == "nam" and object.word == "trung" and object.nextWord1 == "quốc" : object.conclusion = "B"
+		object.prevWord1 == "tình" and object.word == "thương" and object.nextWord1 == "yêu" : object.conclusion = "B"
+		object.prevWord1 == "bóng" and object.word == "bay" : object.conclusion = "B"
+		object.prevWord1 == "trăm" and object.word == "ngàn" and object.nextWord1 == "." : object.conclusion = "B"
+		object.prevWord1 == "đá" and object.word == "vàng" : object.conclusion = "B"
+		object.word == "chủ" and object.nextWord1 == "yếu" : object.conclusion = "B"
+		object.word == "phân" and object.nextWord1 == "phối" and object.nextWord2 == "duy" : object.conclusion = "B"
+		object.prevWord1 == "lên" and object.word == "khuôn" : object.conclusion = "B"
+		object.prevWord1 == "của" and object.word == "độc" : object.conclusion = "B"
+		object.prevWord1 == "có" and object.word == "ăn" : object.conclusion = "B"
+		object.prevWord1 == "chứa" and object.word == "chất" : object.conclusion = "B"
+		object.word == "đỏ" and object.nextWord1 == "rực" : object.conclusion = "B"
+		object.prevWord1 == "chó" and object.word == "chết" : object.conclusion = "B"
+		object.prevWord1 == "ba" and object.word == "chỉ" : object.conclusion = "B"
+		object.word == "thân" and object.nextWord1 == "quen" : object.conclusion = "B"
+		object.prevWord1 == "tăng" and object.nextWord1 == "độ" : object.conclusion = "B"
+		object.prevWord1 == "toàn" and object.word == "lực" : object.conclusion = "B"
+		object.word == "công" and object.nextWord1 == "sở" : object.conclusion = "B"
+		object.prevWord1 == "tình" and object.word == "yêu" and object.nextWord1 == "thương" : object.conclusion = "B"
+		object.prevWord1 == "con" and object.word == "khỉ" : object.conclusion = "B"
+		object.prevWord1 == "lớn" and object.word == "lao" and object.nextWord1 == "động" : object.conclusion = "B"
+		object.prevWord1 == "sát" and object.word == "thực" : object.conclusion = "B"
+		object.prevWord1 == "nhiễm" and object.word == "từ" : object.conclusion = "B"
+		object.prevWord1 == "phụ" and object.word == "từ" : object.conclusion = "B"
+		object.prevWord1 == "xuống" and object.nextWord1 == "rồi" : object.conclusion = "B"
+		object.prevWord1 == "vô" and object.nextWord1 == "tâm" : object.conclusion = "B"
+		object.prevTag2 == "" and object.prevTag1 == "B" and object.word == "bảy" : object.conclusion = "B"
+		object.prevWord1 == "tổ" and object.nextWord1 == "tác" : object.conclusion = "B"
+		object.prevWord1 == "đi" and object.word == "tu" and object.nextWord1 == "nghiệp" : object.conclusion = "B"
+		object.prevWord1 == "tiếng" and object.word == "là" : object.conclusion = "B"
+		object.prevWord1 == "mỹ" and object.word == "học" : object.conclusion = "B"
+		object.prevWord2 == "mấy" and object.prevWord1 == "ngày" and object.word == "nay" : object.conclusion = "B"
+		object.prevWord2 == "không" and object.prevWord1 == "ai" and object.word == "ngờ" : object.conclusion = "B"
+		object.word == "mà" and object.nextWord1 == "còn" : object.conclusion = "B"
+		object.prevWord1 == "lại" and object.word == "quả" : object.conclusion = "B"
+		object.word == "khi" and object.nextWord1 == "không" : object.conclusion = "B"
+		object.prevWord1 == "đèn" and object.word == "vàng" : object.conclusion = "B"
+		object.word == "sinh" and object.nextWord1 == "nhật" : object.conclusion = "B"
+		object.word == "điện" and object.nextWord1 == "thoại" : object.conclusion = "B"
+		object.prevWord1 == "nguyên" and object.word == "đại" : object.conclusion = "B"
+		object.prevWord1 == "trắng" and object.word == "trong" : object.conclusion = "B"
+		object.prevWord1 == "trực" and object.word == "chỉ" : object.conclusion = "B"
+		object.word == "đường" and object.nextWord1 == "băng" : object.conclusion = "B"
+		object.prevWord1 == "con" and object.word == "một" : object.conclusion = "B"
+		object.prevWord2 == "" and object.prevWord1 == "anh" and object.word == "hùng" : object.conclusion = "B"
+		object.prevWord1 == "vn" : object.conclusion = "B"
+		object.word == "thời" and object.nextWord1 == "vụ" : object.conclusion = "B"
+		object.word == "phương" and object.nextWord1 == "thức" : object.conclusion = "B"
+		object.prevWord1 == "tới" and object.word == "số" : object.conclusion = "B"
+	object.tag == "B" : object.conclusion = "B"
+		object.prevWord1 == "tân" : object.conclusion = "I"
+			object.prevTag1 == "I" : object.conclusion = "B"
+			object.prevWord2 == "hoà" : object.conclusion = "B"
+			object.prevTag1 == "B" and object.word == "," : object.conclusion = "B"
+			object.prevWord2 == "ông" : object.conclusion = "B"
+			object.word == "quan" and object.nextTag1 == "B" : object.conclusion = "B"
+			object.word == "cục" and object.nextTag1 == "I" and object.nextTag2 == "B" : object.conclusion = "B"
+			object.prevTag1 == "B" and object.word == "(" : object.conclusion = "B"
+		object.prevWord1 == "cơ" and object.word == "sở" : object.conclusion = "I"
+		object.prevWord1 == "châu" and object.word == "á" : object.conclusion = "I"
+		object.prevWord1 == "điều" and object.word == "kiện" : object.conclusion = "I"
+		object.prevWord1 == "giá" and object.word == "trị" : object.conclusion = "I"
+		object.prevWord1 == "châu" and object.word == "âu" : object.conclusion = "I"
+		object.prevWord1 == "sea" : object.conclusion = "I"
+		object.prevWord1 == "bác" and object.word == "hồ" : object.conclusion = "I"
+		object.prevWord1 == "phúc" and object.word == "huy" : object.conclusion = "I"
+		object.prevWord1 == "phố" and object.word == "đông" : object.conclusion = "I"
+		object.prevWord1 == "tổng" and object.word == "giám" : object.conclusion = "I"
+		object.prevWord1 == "chiến" and object.word == "tranh" : object.conclusion = "I"
+		object.prevWord1 == "võ" and object.word == "hương" : object.conclusion = "I"
+		object.prevWord1 == "thể" and object.word == "hiện" : object.conclusion = "I"
+			object.prevWord1 == "thể" and object.nextWord1 == "nay" : object.conclusion = "B"
+		object.prevWord1 == "dân" and object.word == "tộc" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "thể" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "hoài" and object.nextWord1 == "" : object.conclusion = "I"
+			object.prevWord1 == "hoài" and object.word == "." : object.conclusion = "B"
+		object.prevWord1 == "vịnh" and object.word == "mốc" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "rem" : object.conclusion = "I"
+		object.prevWord1 == "lê" : object.conclusion = "I"
+			object.prevTag1 == "I" : object.conclusion = "B"
+			object.prevTag1 == "B" and object.word == "," and object.nextTag1 == "B" : object.conclusion = "B"
+			object.prevWord2 == "kéo" : object.conclusion = "B"
+			object.word == "bảo" : object.conclusion = "B"
+			object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "." : object.conclusion = "B"
+			object.nextTag2 == "I" : object.conclusion = "B"
+			object.word == "máy" and object.nextTag1 == "I" and object.nextTag2 == "B" : object.conclusion = "B"
+			object.word == "và" and object.nextTag1 == "B" : object.conclusion = "B"
+		object.prevWord1 == "nguyễn" : object.conclusion = "I"
+			object.nextTag2 == "I" : object.conclusion = "B"
+			object.prevTag2 == "I" and object.prevTag1 == "I" : object.conclusion = "B"
+			object.prevTag1 == "B" and object.word == "-" : object.conclusion = "B"
+			object.prevWord2 == "chúa" : object.conclusion = "B"
+			object.prevWord2 == "triều" : object.conclusion = "B"
+			object.prevWord2 == "''" and object.prevWord1 == "nguyễn" and object.word == "''" : object.conclusion = "B"
+			object.word == "(" and object.nextTag1 == "B" and object.nextTag2 == "B" : object.conclusion = "B"
+			object.prevWord2 == "nhà" : object.conclusion = "B"
+		object.prevWord1 == "chợ" and object.word == "rẫy" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "tịch" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "sử" and object.word == "dụng" : object.conclusion = "I"
+		object.prevWord1 == "lan" and object.word == "anh" : object.conclusion = "I"
+		object.prevWord1 == "nhật" and object.word == "linh" : object.conclusion = "I"
+		object.prevWord1 == "chợ" and object.word == "lớn" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "tác" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "trường" and object.word == "sơn" : object.conclusion = "I"
+		object.prevWord1 == "đà" and object.word == "trang" : object.conclusion = "I"
+		object.prevWord1 == "hồng" and object.word == "quỳnh" : object.conclusion = "I"
+		object.prevWord1 == "đăng" and object.word == "nam" : object.conclusion = "I"
+		object.prevWord1 == "ơ" and object.word == "đu" : object.conclusion = "I"
+		object.word == "nguyên" and object.nextTag1 == "" and object.nextTag2 == "" : object.conclusion = "I"
+		object.word == "trung" and object.nextTag1 == "" : object.conclusion = "I"
+			object.prevWord1 == "miền" : object.conclusion = "B"
+		object.prevWord1 == "võ" and object.nextWord1 == "quỳnh" : object.conclusion = "I"
+		object.prevTag2 == "" and object.prevTag1 == "B" and object.word == "hưng" : object.conclusion = "I"
+			object.word == "hưng" and object.nextWord1 == "''" and object.nextWord2 == "cụt" : object.conclusion = "B"
+			object.prevWord2 == "" and object.prevWord1 == "ông" and object.word == "hưng" : object.conclusion = "B"
+		object.prevTag1 == "B" and object.word == "dũ" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "ba" and object.word == "thức" : object.conclusion = "I"
+		object.prevWord1 == "ngọc" and object.word == "ẩn" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "phận" and object.nextTag1 == "B" : object.conclusion = "I"
+			object.prevWord2 == "thăng" : object.conclusion = "B"
+		object.prevTag1 == "I" and object.word == "cư" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "biên" and object.word == "giới" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "viên" : object.conclusion = "I"
+		object.prevWord1 == "quốc" and object.word == "thanh" : object.conclusion = "I"
+		object.prevWord2 == "thuộc" and object.word == "bàn" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "dương" and object.nextTag1 == "B" : object.conclusion = "I"
+			object.prevWord2 == "lãi" : object.conclusion = "B"
+		object.prevWord1 == "đặng" : object.conclusion = "I"
+			object.prevTag1 == "I" : object.conclusion = "B"
+			object.prevTag1 == "B" and object.word == "," and object.nextTag1 == "B" : object.conclusion = "B"
+		object.prevTag1 == "I" and object.word == "trạng" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "trưởng" and object.word == "phòng" : object.conclusion = "I"
+		object.word == "vũ" and object.nextTag1 == "" and object.nextTag2 == "" : object.conclusion = "I"
+		object.prevWord2 == "đồng" and object.prevWord1 == "tháp" and object.word == "mười" : object.conclusion = "I"
+		object.prevWord1 == "xe" and object.word == "bò" : object.conclusion = "I"
+		object.word == "hùng" and object.nextTag1 == "" : object.conclusion = "I"
+		object.word == "toàn" and object.nextTag1 == "" and object.nextTag2 == "" : object.conclusion = "I"
+		object.prevWord1 == "chánh" and object.word == "nghĩa" : object.conclusion = "I"
+		object.prevWord1 == "cuộc" and object.word == "sống" : object.conclusion = "I"
+		object.prevWord2 == "màu" and object.prevWord1 == "da" and object.word == "cam" : object.conclusion = "I"
+		object.word == "trường" and object.nextWord1 == "" and object.nextWord2 == "" : object.conclusion = "I"
+		object.prevWord1 == "/ch" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "hoá" and object.nextTag1 == "B" : object.conclusion = "I"
+			object.word == "hoá" and object.nextWord1 == "thương" : object.conclusion = "B"
+			object.nextWord2 == "hoạt" : object.conclusion = "B"
+		object.prevWord1 == "trần" and object.nextWord1 == "nghĩa" : object.conclusion = "I"
+		object.prevWord1 == "công" and object.word == "nhân" : object.conclusion = "I"
+		object.prevWord1 == "chứ" and object.word == "chúng" and object.nextWord1 == "lầu" : object.conclusion = "I"
+		object.prevWord2 == "để" and object.prevWord1 == "trở" and object.word == "thành" : object.conclusion = "I"
+		object.prevWord1 == "chi" and object.word == "phí" : object.conclusion = "I"
+		object.prevWord1 == "minh" and object.word == "luận" : object.conclusion = "I"
+		object.prevWord2 == "lê" and object.word == "đủ" : object.conclusion = "I"
+		object.prevWord1 == "bến" and object.word == "nghé" : object.conclusion = "I"
+		object.prevWord1 == "a" and object.word == "gia" : object.conclusion = "I"
+		object.prevWord1 == "duy" and object.nextWord1 == "" : object.conclusion = "I"
+		object.prevWord1 == "cẩm" and object.word == "hà" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "long" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord1 == "bến" and object.word == "thành" : object.conclusion = "I"
+		object.prevWord1 == "năm" and object.word == "cam" : object.conclusion = "I"
+		object.prevWord1 == "nhà" and object.word == "lớn" : object.conclusion = "I"
+			object.prevWord2 == "căn" : object.conclusion = "B"
+			object.word == "lớn" and object.nextWord2 == "," : object.conclusion = "B"
+		object.prevWord2 == "ba" and object.prevWord1 == "tháng" and object.word == "hai" : object.conclusion = "I"
+		object.prevWord1 == "thực" and object.word == "hiện" : object.conclusion = "I"
+		object.word == "giang" and object.nextWord2 == "" : object.conclusion = "I"
+		object.prevWord1 == "mỹ" and object.nextWord1 == "," : object.conclusion = "I"
+			object.prevWord1 == "mỹ" and object.word == ")" : object.conclusion = "B"
+			object.prevWord1 == "mỹ" and object.word == "kể" and object.nextWord1 == "," : object.conclusion = "B"
+		object.prevWord1 == "độc" and object.word == "màu" : object.conclusion = "I"
+		object.prevWord1 == "trình" and object.word == "độ" : object.conclusion = "I"
+		object.word == "văn" and object.nextWord2 == "" : object.conclusion = "I"
+			object.word == "văn" and object.nextWord1 == "." : object.conclusion = "B"
+			object.nextTag1 == "I" and object.nextTag2 == "" : object.conclusion = "B"
+		object.prevTag1 == "I" and object.word == "cáo" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.word == "dũng" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord1 == "thầy" and object.word == "cai" : object.conclusion = "I"
+		object.prevWord1 == "điện" and object.word == "thoại" : object.conclusion = "I"
+		object.prevWord1 == "dây" and object.word == "điện" : object.conclusion = "I"
+			object.word == "điện" and object.nextWord1 == "thoại" : object.conclusion = "B"
+		object.prevWord1 == "phú" and object.word == "trung" : object.conclusion = "I"
+		object.word == "thành" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord1 == "yến" and object.word == "trinh" : object.conclusion = "I"
+		object.prevWord2 == "tôn" and object.prevWord1 == "thất" and object.word == "bách" : object.conclusion = "I"
+		object.prevWord2 == "bộ" and object.prevWord1 == "luật" and object.word == "tố" : object.conclusion = "I"
+		object.prevWord2 == "được" and object.prevWord1 == "việc" and object.word == "làm" : object.conclusion = "I"
+		object.prevWord2 == "xoá" and object.prevWord1 == "mù" and object.word == "chữ" : object.conclusion = "I"
+		object.prevWord1 == "châu" and object.word == "phi" : object.conclusion = "I"
+		object.prevWord1 == "o" and object.word == "c" : object.conclusion = "I"
+		object.prevWord1 == "học" and object.word == "tập" : object.conclusion = "I"
+		object.prevWord1 == "riêng" and object.word == "lẻ" : object.conclusion = "I"
+		object.prevWord1 == "hôm" and object.word == "qua" : object.conclusion = "I"
+		object.prevWord1 == "bao" and object.word == "giờ" : object.conclusion = "I"
+		object.prevWord1 == "á" and object.word == "châu" : object.conclusion = "I"
+		object.prevWord1 == "nghĩa" and object.word == "vụ" : object.conclusion = "I"
+		object.prevWord1 == "thất" and object.word == "tùng" : object.conclusion = "I"
+		object.prevWord1 == "trung" and object.word == "bộ" : object.conclusion = "I"
+		object.prevWord1 == "nghĩa" and object.word == "việt" and object.nextWord1 == "nam" : object.conclusion = "I"
+		object.prevWord1 == "tộc" and object.word == "thiểu" and object.nextWord1 == "số" : object.conclusion = "I"
+		object.prevWord1 == "năm" and object.word == "minh" : object.conclusion = "I"
+		object.prevWord1 == "ông" and object.word == "kích" : object.conclusion = "I"
+		object.prevWord1 == "mỹ" and object.word == "thuận" : object.conclusion = "I"
+		object.prevWord1 == "đức" and object.word == "bình" : object.conclusion = "I"
+		object.prevWord1 == "tuấn" and object.word == "phùng" : object.conclusion = "I"
+		object.prevWord1 == "vận" and object.word == "chuyển" : object.conclusion = "I"
+		object.prevWord1 == "hai" and object.word == "cà" : object.conclusion = "I"
+		object.word == "hải" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord1 == "nước" and object.word == "ngọt" : object.conclusion = "I"
+		object.prevWord1 == "hoà" and object.word == "xã" and object.nextWord1 == "hội" : object.conclusion = "I"
+		object.prevWord1 == "mặt" and object.word == "bằng" : object.conclusion = "I"
+		object.prevWord1 == "quang" and object.word == "thiện" : object.conclusion = "I"
+		object.prevWord1 == "điện" and object.word == "ngọc" : object.conclusion = "I"
+		object.prevWord1 == "lý" and object.word == "do" : object.conclusion = "I"
+		object.prevWord1 == "công" and object.word == "việc" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "xanh" : object.conclusion = "I"
+		object.prevWord1 == "ba" and object.word == "lệ" : object.conclusion = "I"
+		object.prevWord1 == "chiến" and object.word == "đấu" : object.conclusion = "I"
+		object.prevWord1 == "số" and object.word == "lượng" : object.conclusion = "I"
+		object.prevWord1 == "tụng" and object.nextWord1 == "sự" : object.conclusion = "I"
+		object.word == "son" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "mỹ" and object.word == "sơn" : object.conclusion = "I"
+		object.prevWord1 == "hoà" and object.word == "tân" : object.conclusion = "I"
+		object.prevWord2 == "quyền" and object.word == "pháp" : object.conclusion = "I"
+		object.prevWord1 == "trà" and object.nextWord1 == "," : object.conclusion = "I"
+			object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "xanh" : object.conclusion = "B"
+		object.prevWord2 == "cầu" and object.word == "tẻ" : object.conclusion = "I"
+		object.prevWord1 == "a" and object.nextWord1 == "," : object.conclusion = "I"
+			object.prevWord1 == "a" and object.word == ")" : object.conclusion = "B"
+		object.prevWord1 == "phương" and object.word == "án" : object.conclusion = "I"
+		object.word == "nguyên" and object.nextWord1 == "-" : object.conclusion = "I"
+		object.prevWord1 == "trọng" and object.word == "phú" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "kính" : object.conclusion = "I"
+		object.prevWord1 == "chế" and object.word == "biến" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "binh" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord2 == "một" and object.prevWord1 == "số" and object.word == "ít" : object.conclusion = "I"
+		object.prevWord1 == "cơ" and object.word == "quan" : object.conclusion = "I"
+		object.prevWord1 == "ông" and object.word == "buông" : object.conclusion = "I"
+		object.prevWord1 == "thất" and object.word == "thuyết" : object.conclusion = "I"
+		object.prevWord1 == "trường" and object.word == "giang" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "tiên" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "ngân" and object.word == "sách" : object.conclusion = "I"
+		object.word == "cộng" and object.nextWord1 == "sản" and object.nextWord2 == "việt" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "chức" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "thành" and object.word == "long" : object.conclusion = "I"
+		object.prevWord1 == "mỹ" and object.word == "đình" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "tình" : object.conclusion = "I"
+		object.prevWord1 == "đức" and object.nextWord1 == "," : object.conclusion = "I"
+			object.word == ")" : object.conclusion = "B"
+			object.prevTag2 == "I" and object.prevTag1 == "I" : object.conclusion = "B"
+		object.prevWord1 == "hồ" and object.nextWord1 == "-" : object.conclusion = "I"
+		object.prevWord1 == "lawrence" and object.word == "s.ting" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "mart" : object.conclusion = "I"
+		object.prevWord1 == "gia" and object.word == "cầm" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "sác" : object.conclusion = "I"
+		object.prevWord1 == "chủ" and object.word == "nhiệm" : object.conclusion = "I"
+		object.prevWord1 == "công" and object.word == "ty" : object.conclusion = "I"
+		object.prevWord1 == "bà" and object.word == "điểm" : object.conclusion = "I"
+		object.prevWord2 == "như" and object.prevWord1 == "thế" and object.word == "này" : object.conclusion = "I"
+			object.nextWord2 == "một" : object.conclusion = "B"
+			object.prevWord1 == "thế" and object.nextWord1 == "''" : object.conclusion = "B"
+		object.prevTag1 == "I" and object.word == "hoạch" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "vị" and object.word == "trí" : object.conclusion = "I"
+		object.prevWord1 == "máy" and object.word == "bay" : object.conclusion = "I"
+		object.prevWord1 == "đồng" and object.word == "phú" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "lực" and object.nextTag1 == "B" : object.conclusion = "I"
+			object.word == "lực" and object.nextWord1 == "và" : object.conclusion = "B"
+		object.prevWord1 == "dương" and object.word == "vương" : object.conclusion = "I"
+		object.word == "kheng" : object.conclusion = "I"
+		object.prevWord1 == "bến" and object.nextWord1 == "đồn" : object.conclusion = "I"
+		object.prevWord1 == "giá" and object.word == "thành" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "ren" : object.conclusion = "I"
+		object.word == "thống" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "ba" and object.word == "lai" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "nghiệp" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "viết" and object.word == "nghệ" and object.nextWord1 == "tĩnh" : object.conclusion = "I"
+		object.prevWord1 == "xe" and object.word == "jeep" : object.conclusion = "I"
+		object.prevWord1 == "lam" and object.word == "điền" : object.conclusion = "I"
+		object.prevWord1 == "pháp" and object.word == "vân" : object.conclusion = "I"
+		object.prevWord1 == "đầm" and object.word == "sen" : object.conclusion = "I"
+		object.prevWord1 == "yhán" : object.conclusion = "I"
+		object.prevWord1 == "đức" and object.word == "vịnh" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "tế" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "sĩ" : object.conclusion = "I"
+		object.prevWord2 == "bà" and object.prevWord1 == "huyện" and object.word == "thanh" : object.conclusion = "I"
+		object.prevWord1 == "võ" and object.nextWord1 == "cầu" : object.conclusion = "I"
+		object.prevWord2 == "võ" and object.word == "cầu" : object.conclusion = "I"
+		object.prevWord1 == "mặt" and object.word == "đất" : object.conclusion = "I"
+		object.prevWord1 == "cả" and object.word == "cấm" : object.conclusion = "I"
+		object.prevWord1 == "hai" and object.word == "trâm" : object.conclusion = "I"
+		object.prevWord2 == "u" and object.word == "hạ" : object.conclusion = "I"
+		object.prevWord1 == "quang" and object.word == "khải" : object.conclusion = "I"
+		object.prevWord1 == "nghiệp" and object.word == "vụ" and object.nextWord1 == "hành" : object.conclusion = "I"
+		object.prevWord1 == "tổng" and object.word == "cục" and object.nextWord1 == "trưởng" : object.conclusion = "I"
+		object.prevWord2 == "tổng" and object.prevWord1 == "thu" and object.word == "nhập" : object.conclusion = "I"
+		object.prevWord1 == "tượng" and object.nextWord1 == "văn" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "trợ" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "do" and object.word == "lộ" : object.conclusion = "I"
+		object.prevWord1 == "hoàng" and object.word == "trí" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "anh" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord2 == "despicable" and object.prevWord1 == "me" and object.word == "2" : object.conclusion = "I"
+		object.word == "panda" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord2 == "tiếng" and object.prevWord1 == "động" and object.word == "cơ" : object.conclusion = "I"
+		object.word == "sông" and object.nextWord1 == "ngân" : object.conclusion = "I"
+		object.prevWord2 == "ty" and object.word == "đà" : object.conclusion = "I"
+		object.prevWord1 == "sông" and object.word == "ngân" : object.conclusion = "I"
+		object.prevWord1 == "minh" and object.word == "toán" : object.conclusion = "I"
+		object.prevWord1 == "học" and object.word == "sinh" : object.conclusion = "I"
+		object.prevWord1 == "mạnh" and object.word == "tuấn" : object.conclusion = "I"
+		object.prevWord2 == "người" and object.prevWord1 == "dân" and object.word == "nghèo" : object.conclusion = "I"
+		object.prevWord2 == "" and object.prevWord1 == "quốc" and object.word == "việt" : object.conclusion = "I"
+		object.prevWord1 == "kim" and object.word == "em" : object.conclusion = "I"
+		object.prevWord1 == "tài" and object.word == "xế" : object.conclusion = "I"
+		object.prevWord2 == "trần" and object.prevWord1 == "thế" and object.word == "ngọc" : object.conclusion = "I"
+		object.prevWord1 == "đầu" and object.word == "mối" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "động" and object.nextTag1 == "B" : object.conclusion = "I"
+			object.prevWord1 == "cốc" and object.nextWord1 == "," : object.conclusion = "B"
+			object.prevWord1 == "thuỷ" : object.conclusion = "B"
+			object.word == "động" and object.nextWord2 == "," : object.conclusion = "B"
+			object.prevWord1 == "tập" and object.word == "động" : object.conclusion = "B"
+		object.prevWord1 == "đài" and object.word == "bắc" : object.conclusion = "I"
+		object.prevWord1 == "ô" and object.word == "loan" : object.conclusion = "I"
+		object.prevWord2 == "cầu" and object.prevWord1 == "ông" and object.word == "lãnh" : object.conclusion = "I"
+		object.prevWord2 == "báo" and object.prevWord1 == "công" and object.word == "an" : object.conclusion = "I"
+		object.prevWord1 == "nam" and object.word == "bộ" : object.conclusion = "I"
+		object.prevWord1 == "phì" and object.word == "nhiêu" : object.conclusion = "I"
+		object.word == "cường" and object.nextWord2 == "" : object.conclusion = "I"
+		object.prevWord1 == "thái" and object.word == "huyền" : object.conclusion = "I"
+		object.prevWord1 == "bến" and object.word == "hải" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.nextWord1 == "vọng" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "lộc" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "chức" and object.nextWord1 == "nguyên" : object.conclusion = "I"
+		object.prevWord2 == "cộng" and object.prevWord1 == "sản" and object.word == "việt" : object.conclusion = "I"
+		object.prevWord2 == "cầu" and object.word == "thiêm" : object.conclusion = "I"
+		object.word == "dảnh" : object.conclusion = "I"
+		object.prevWord1 == "chi" and object.word == "tiêu" : object.conclusion = "I"
+		object.prevWord1 == "kenneth" : object.conclusion = "I"
+		object.prevWord2 == "an" and object.prevWord1 == "phú" and object.word == "đông" : object.conclusion = "I"
+		object.prevWord2 == "thờ" and object.prevWord1 == "đức" and object.word == "bà" : object.conclusion = "I"
+		object.prevWord1 == "tiêu" and object.word == "thụ" : object.conclusion = "I"
+		object.prevWord1 == "chi" and object.word == "mai" : object.conclusion = "I"
+		object.prevWord2 == "cụm" and object.prevWord1 == "cảng" and object.word == "hàng" : object.conclusion = "I"
+		object.word == "thạnh" and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord1 == "như" and object.word == "vọng" : object.conclusion = "I"
+		object.prevWord1 == "tiến" and object.word == "độ" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "sĩ" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "an." : object.conclusion = "I"
+		object.prevWord2 == "," and object.prevWord1 == "nào" and object.word == "là" : object.conclusion = "I"
+		object.prevWord1 == "bà" and object.word == "huyện" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "đức" : object.conclusion = "I"
+		object.prevWord1 == "ông" and object.word == "địa" : object.conclusion = "I"
+		object.prevWord1 == "ba" and object.word == "đông" : object.conclusion = "I"
+		object.prevWord1 == "a" and object.nextWord1 == "(" : object.conclusion = "I"
+		object.prevWord1 == "y" and object.word == "chu" : object.conclusion = "I"
+		object.prevWord2 == "nên" and object.prevWord1 == "người" and object.word == "dân" : object.conclusion = "I"
+		object.prevWord1 == "mặt" and object.word == "hàng" : object.conclusion = "I"
+		object.prevWord1 == "bến" and object.word == "chương" : object.conclusion = "I"
+		object.prevWord2 == "bồ" and object.word == "đạt" : object.conclusion = "I"
+		object.prevWord1 == "đức" and object.word == "lập" and object.nextWord1 == "thượng" : object.conclusion = "I"
+		object.prevWord1 == "hà" and object.word == "đồng" : object.conclusion = "I"
+		object.prevWord2 == "máy" and object.prevWord1 == "điện" and object.word == "đàm" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "lũy" : object.conclusion = "I"
+		object.prevWord1 == "quyết" and object.word == "định" : object.conclusion = "I"
+		object.prevWord1 == "trà" and object.word == "my" : object.conclusion = "I"
+		object.word == "biến" and object.nextWord1 == "đổi" and object.nextWord2 == "gen" : object.conclusion = "I"
+		object.prevWord1 == "cống" and object.word == "quỳnh" : object.conclusion = "I"
+		object.prevWord1 == "có" and object.word == "lẽ" : object.conclusion = "I"
+		object.prevWord1 == "bà" and object.word == "hom" : object.conclusion = "I"
+		object.prevWord2 == "như" and object.prevWord1 == "thường" and object.word == "lệ" : object.conclusion = "I"
+		object.prevWord1 == "nước" and object.word == "ngoài" : object.conclusion = "I"
+		object.prevWord1 == "đoàn" and object.word == "đức" : object.conclusion = "I"
+		object.prevWord2 == "cao" and object.word == "quang" : object.conclusion = "I"
+		object.prevWord2 == "đức" and object.prevWord1 == "lập" and object.word == "thượng" : object.conclusion = "I"
+		object.prevWord1 == "hưng" and object.word == "thuận" : object.conclusion = "I"
+		object.prevWord2 == "biến" and object.prevWord1 == "đổi" and object.word == "gen" : object.conclusion = "I"
+		object.prevWord2 == "hồ" and object.prevWord1 == "thuỷ" and object.word == "điện" : object.conclusion = "I"
+		object.prevWord1 == "tải" and object.word == "trọng" : object.conclusion = "I"
+		object.prevWord2 == "người" and object.prevWord1 == "lớn" and object.word == "tuổi" : object.conclusion = "I"
+		object.prevWord2 == "" and object.prevWord1 == "nghe" and object.word == "nói" : object.conclusion = "I"
+		object.prevWord1 == "mỹ" and object.nextWord1 == "(" : object.conclusion = "I"
+		object.prevWord1 == "lái" and object.word == "thiêu" : object.conclusion = "I"
+		object.prevWord1 == "công" and object.word == "trình" : object.conclusion = "I"
+		object.prevWord1 == "hai" and object.word == "hoàng" : object.conclusion = "I"
+		object.prevWord1 == "hồng" and object.nextWord1 == "(" : object.conclusion = "I"
+		object.word == "qua" and object.nextWord2 == "lại" : object.conclusion = "I"
+			object.prevWord1 == "đổ" : object.conclusion = "B"
+		object.prevWord1 == "trường" and object.word == "yên" : object.conclusion = "I"
+		object.prevWord1 == "qua" and object.nextWord1 == "lại" : object.conclusion = "I"
+			object.prevTag1 == "B" and object.word == "đổ" : object.conclusion = "B"
+		object.prevWord1 == "uyên" and object.word == "ly" : object.conclusion = "I"
+		object.prevWord1 == "công" and object.word == "bố" : object.conclusion = "I"
+		object.prevWord1 == "bộ" and object.word == "trưởng" : object.conclusion = "I"
+		object.prevWord1 == "đoàn" and object.word == "thượng" : object.conclusion = "I"
+		object.prevWord1 == "thế" and object.word == "hùng" : object.conclusion = "I"
+		object.prevWord1 == "xe" and object.word == "tải" : object.conclusion = "I"
+		object.prevWord1 == "xe" and object.word == "khách" : object.conclusion = "I"
+		object.prevWord2 == "dì" and object.word == "nở" : object.conclusion = "I"
+		object.word == "m'ga" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "đin" : object.conclusion = "I"
+		object.prevWord2 == "lãnh" and object.word == "thăng" : object.conclusion = "I"
+		object.prevWord1 == "cửa" and object.word == "cạn" : object.conclusion = "I"
+		object.prevWord1 == "thế" and object.word == "nhã" and object.nextWord1 == "" : object.conclusion = "I"
+		object.prevWord2 == "ông" and object.word == "đuai" : object.conclusion = "I"
+		object.prevWord1 == "đồng" and object.word == "khánh" : object.conclusion = "I"
+		object.prevWord2 == "bé" and object.word == "duyên" : object.conclusion = "I"
+		object.word == "nguyễn" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "tâm" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "hiếu" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "bột" : object.conclusion = "I"
+		object.prevWord2 == "lúa" and object.prevWord1 == "đông" and object.word == "xuân" : object.conclusion = "I"
+		object.prevWord1 == "duy" and object.nextWord1 == "-" : object.conclusion = "I"
+		object.prevWord2 == "tịch" and object.prevWord1 == "đoàn" and object.word == "chủ" : object.conclusion = "I"
+		object.prevWord2 == "vận" and object.word == "mùa" : object.conclusion = "I"
+		object.word == "giang" and object.nextTag1 == "B" and object.nextTag2 == "I" : object.conclusion = "I"
+		object.prevWord2 == "cười" and object.word == "khà" : object.conclusion = "I"
+		object.prevWord2 == "vùng" and object.prevWord1 == "cao" and object.word == "nguyên" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "h'ly" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord2 == "hội" and object.word == "trúc" : object.conclusion = "I"
+		object.prevWord1 == "biến" and object.word == "đổi" : object.conclusion = "I"
+		object.prevWord2 == "cục" and object.prevWord1 == "tác" and object.word == "chiến" : object.conclusion = "I"
+		object.prevWord1 == "đức" and object.word == "việt" : object.conclusion = "I"
+		object.prevWord1 == "cách" and object.nextWord1 == "mộc" : object.conclusion = "I"
+		object.prevWord1 == "đồng" and object.word == "đăng" : object.conclusion = "I"
+		object.prevWord1 == "tháp" and object.word == "chàm" : object.conclusion = "I"
+			object.prevWord1 == "tháp" and object.nextWord1 == "mỹ" : object.conclusion = "B"
+		object.prevTag1 == "I" and object.word == "thiết" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "sức" and object.word == "mạnh" : object.conclusion = "I"
+		object.prevWord1 == "thành" and object.word == "phố" : object.conclusion = "I"
+			object.prevWord2 == "hình" : object.conclusion = "B"
+		object.word == "thức" and object.nextTag1 == "B" and object.nextTag2 == "I" : object.conclusion = "I"
+			object.prevTag1 == "B" and object.word == "thức" : object.conclusion = "B"
+		object.prevWord1 == "chương" and object.word == "trình" : object.conclusion = "I"
+		object.prevWord1 == "thể" and object.word == "chế" : object.conclusion = "I"
+		object.prevWord1 == "chợ" and object.word == "đệm" : object.conclusion = "I"
+		object.word == "mai" and object.nextWord1 == "công" : object.conclusion = "I"
+		object.prevWord2 == "chất" and object.prevWord1 == "độc" and object.word == "hại" : object.conclusion = "I"
+		object.prevWord1 == "tổng" and object.word == "biên" : object.conclusion = "I"
+		object.prevWord1 == "thời" and object.word == "gian" : object.conclusion = "I"
+		object.prevWord1 == "a" and object.word == "xờ" : object.conclusion = "I"
+		object.prevWord1 == "y" and object.word == "lan" : object.conclusion = "I"
+		object.word == "thế" and object.nextWord1 == "nhã" : object.conclusion = "I"
+		object.prevWord1 == "hai" and object.word == "đực" : object.conclusion = "I"
+		object.prevWord1 == "thủ" and object.word == "tướng" : object.conclusion = "I"
+		object.prevWord2 == "hội" and object.word == "hè" : object.conclusion = "I"
+		object.prevWord2 == "nước" and object.prevWord1 == "giải" and object.word == "khát" : object.conclusion = "I"
+		object.prevWord1 == "nước" and object.word == "sạch" : object.conclusion = "I"
+		object.prevWord2 == "," and object.prevWord1 == "biển" and object.word == "đông" : object.conclusion = "I"
+		object.prevWord1 == "mai" and object.word == "công" : object.conclusion = "I"
+		object.prevWord2 == "kungfu" and object.word == "2" : object.conclusion = "I"
+		object.prevWord1 == "quyền" and object.word == "anh" : object.conclusion = "I"
+		object.prevWord1 == "thuế" and object.word == "thu" and object.nextWord1 == "nhập" : object.conclusion = "I"
+		object.prevWord1 == "biển" and object.word == "hồ" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "xóm" : object.conclusion = "I"
+		object.prevWord1 == "đồng" and object.word == "hồ" : object.conclusion = "I"
+		object.prevWord1 == "trà" and object.nextWord1 == "(" : object.conclusion = "I"
+		object.prevWord1 == "tốc" and object.word == "độ" : object.conclusion = "I"
+		object.prevWord1 == "nhà" and object.word == "rồng" : object.conclusion = "I"
+		object.prevWord2 == "đồng" and object.prevWord1 == "tiền" and object.word == "mặt" : object.conclusion = "I"
+		object.prevWord1 == "tình" and object.word == "cờ" : object.conclusion = "I"
+		object.prevWord1 == "bến" and object.word == "thuỷ" : object.conclusion = "I"
+		object.prevWord1 == "điện" and object.word == "nam" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "bạc" : object.conclusion = "I"
+		object.prevWord1 == "đức" and object.word == "minh" : object.conclusion = "I"
+		object.prevWord1 == "a" and object.word == "roàng" : object.conclusion = "I"
+		object.word == "hủ" and object.nextWord1 == "-" : object.conclusion = "I"
+		object.prevWord1 == "phần" and object.word == "lăng" : object.conclusion = "I"
+		object.word == "duy" and object.nextWord1 == "" : object.conclusion = "I"
+		object.word == "complex" and object.nextWord1 == "(" : object.conclusion = "I"
+		object.prevWord2 == "" and object.prevWord1 == "vậy" and object.word == "thì" : object.conclusion = "I"
+		object.prevWord2 == "bên" and object.word == "đơn" : object.conclusion = "I"
+		object.word == "nuôi" and object.nextWord1 == "" and object.nextWord2 == "" : object.conclusion = "I"
+		object.prevWord1 == "hai" and object.word == "thẹn" : object.conclusion = "I"
+		object.prevWord2 == "nhật" and object.prevWord1 == "bản" and object.word == "b" : object.conclusion = "I"
+		object.prevWord1 == "hội" and object.nextWord1 == "lần" : object.conclusion = "I"
+		object.prevWord2 == "sự" and object.prevWord1 == "chủ" and object.word == "trì" : object.conclusion = "I"
+		object.prevWord1 == "learning" and object.word == "to" : object.conclusion = "I"
+		object.prevWord1 == "thi" and object.word == "ngôn" : object.conclusion = "I"
+		object.word == "xơr" : object.conclusion = "I"
+		object.prevWord2 == "lên" and object.prevWord1 == "cơn" and object.word == "sốt" : object.conclusion = "I"
+		object.prevWord1 == "buôn" and object.word == "kuôp" : object.conclusion = "I"
+		object.prevWord1 == "đỗ" and object.word == "hữu" : object.conclusion = "I"
+		object.word == "hoà" and object.nextTag1 == "" and object.nextTag2 == "" : object.conclusion = "I"
+		object.prevWord1 == "đăm" and object.word == "b'lon" : object.conclusion = "I"
+		object.prevWord2 == "lái" and object.prevWord1 == "xe" and object.word == "ôm" : object.conclusion = "I"
+		object.prevWord1 == "c.h.t.n.thị" : object.conclusion = "I"
+		object.prevWord2 == "mở" and object.prevWord1 == "đường" and object.word == "bay" : object.conclusion = "I"
+		object.prevWord2 == "từ" and object.prevWord1 == "nguyên" and object.word == "nhân" : object.conclusion = "I"
+		object.prevWord1 == "bà" and object.word == "kèo" : object.conclusion = "I"
+		object.prevWord2 == "xã" and object.word == "tiến" : object.conclusion = "I"
+		object.nextWord1 == "wầm" : object.conclusion = "I"
+		object.prevWord1 == "ký" and object.word == "toà" and object.nextWord1 == "soạn" : object.conclusion = "I"
+		object.prevWord1 == "trúc" and object.word == "sư" : object.conclusion = "I"
+		object.word == "hoả" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.word == "sỹ" : object.conclusion = "I"
+		object.prevWord1 == "huyết" and object.word == "mạch" : object.conclusion = "I"
+		object.prevWord2 == "tầng" and object.prevWord1 == "đồng" and object.word == "giao" : object.conclusion = "I"
+		object.prevWord1 == "a" and object.word == "kiệm" : object.conclusion = "I"
+		object.prevWord2 == "người" and object.prevWord1 == "làm" and object.word == "việc" : object.conclusion = "I"
+		object.prevWord2 == "thổ" and object.prevWord1 == "sông" and object.word == "hồng" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "hlới" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.word == "nhật" and object.nextWord2 == "b" : object.conclusion = "I"
+		object.prevWord2 == "qua" and object.word == "lại" : object.conclusion = "I"
+			object.prevWord2 == "qua" and object.prevWord1 == "đổ" and object.word == "lại" : object.conclusion = "B"
+		object.prevWord1 == "gia" and object.word == "đình" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "đợ" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "xuân" and object.nextWord1 == "-" : object.conclusion = "I"
+		object.prevWord1 == "quản" and object.word == "lý" : object.conclusion = "I"
+		object.prevWord1 == "điều" and object.word == "chỉnh" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "rào" : object.conclusion = "I"
+		object.prevWord1 == "j." : object.conclusion = "I"
+		object.word == "hải" and object.nextWord1 == "-" : object.conclusion = "I"
+		object.prevWord1 == "hữu" and object.word == "trí" : object.conclusion = "I"
+			object.word == "trí" and object.nextWord2 == "," : object.conclusion = "B"
+		object.prevWord1 == "a." and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord2 == "ông" and object.prevWord1 == "năm" and object.word == "đàng" : object.conclusion = "I"
+		object.prevWord2 == "châu" and object.word == "a" : object.conclusion = "I"
+		object.prevWord2 == "đảng" and object.prevWord1 == "lần" and object.word == "thứ" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "tra" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord2 == "nghỉ" and object.word == "sức" : object.conclusion = "I"
+		object.prevWord1 == "đền" and object.word == "lừ" : object.conclusion = "I"
+		object.prevWord1 == "mỹ" and object.word == "thạnh" : object.conclusion = "I"
+		object.prevWord1 == "ngọc" and object.word == "luận" : object.conclusion = "I"
+		object.prevWord1 == "sinh" and object.word == "sống" : object.conclusion = "I"
+		object.prevWord1 == "thực" and object.word == "tiễn" : object.conclusion = "I"
+		object.prevWord1 == "cầu" and object.word == "kho" : object.conclusion = "I"
+		object.prevWord1 == "đèo" and object.word == "sen" : object.conclusion = "I"
+		object.prevWord1 == "đạo" and object.word == "đức" : object.conclusion = "I"
+		object.prevWord1 == "nga" and object.word == "mân" : object.conclusion = "I"
+		object.prevWord1 == "làm" and object.word == "ăn" : object.conclusion = "I"
+		object.prevWord2 == "hội" and object.prevWord1 == "đảng" and object.word == "lần" : object.conclusion = "I"
+		object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "nôm" : object.conclusion = "I"
+		object.prevWord1 == "bản" and object.word == "lý" : object.conclusion = "I"
+			object.prevWord1 == "bản" and object.word == "lý" and object.nextWord1 == "a" : object.conclusion = "B"
+		object.prevTag1 == "B" and object.word == "bian" : object.conclusion = "I"
+		object.word == "dah" and object.nextWord1 == "wen" : object.conclusion = "I"
+		object.prevWord1 == "đội" and object.word == "cấn" : object.conclusion = "I"
+		object.prevWord2 == "chị" and object.prevWord1 == "hai" and object.word == "trầm" : object.conclusion = "I"
+		object.prevWord1 == "tổng" and object.word == "thanh" and object.nextWord1 == "tra" : object.conclusion = "I"
+		object.word == "hà" and object.nextTag1 == "" and object.nextTag2 == "" : object.conclusion = "I"
+		object.word == "kiều" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "thiếu" and object.word == "gia" : object.conclusion = "I"
+		object.prevWord1 == "bỏ" and object.word == "của" and object.nextWord1 == "chạy" : object.conclusion = "I"
+		object.prevWord1 == "cửa" and object.word == "hàng" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "kiến" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "công" and object.word == "khai" : object.conclusion = "I"
+		object.prevWord2 == "" and object.word == "diễm" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "zich" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "thuế" and object.word == "giá" : object.conclusion = "I"
+		object.prevWord1 == "thuốc" and object.word == "nam" : object.conclusion = "I"
+		object.prevWord2 == "để" and object.prevWord1 == "trở" and object.word == "lại" : object.conclusion = "I"
+		object.prevWord2 == "có" and object.prevWord1 == "hạn" and object.word == "ngạch" : object.conclusion = "I"
+		object.word == "sơn" and object.nextWord1 == "" and object.nextWord2 == "" : object.conclusion = "I"
+		object.prevWord2 == "lời" and object.word == "đáp" : object.conclusion = "I"
+		object.nextWord1 == "hyun" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "hương" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord2 == "bỏ" and object.prevWord1 == "của" and object.word == "chạy" : object.conclusion = "I"
+		object.prevWord1 == "nghĩa" and object.word == "trang" : object.conclusion = "I"
+		object.prevWord1 == "bác" and object.word == "mười" : object.conclusion = "I"
+		object.prevWord2 == "đỗ" and object.word == "ngoạn" : object.conclusion = "I"
+		object.word == "nham" and object.nextTag1 == "B" and object.nextTag2 == "B" : object.conclusion = "I"
+		object.prevWord1 == "phố" and object.word == "là" and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord2 == "y" and object.word == "học" : object.conclusion = "I"
+		object.word == "ngọc" and object.nextTag1 == "" and object.nextTag2 == "" : object.conclusion = "I"
+		object.prevWord2 == "nữ" and object.prevWord1 == "tướng" and object.word == "cướp" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "tùng" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "v.bảo" : object.conclusion = "I"
+		object.prevWord1 == "thể" and object.word == "thao" : object.conclusion = "I"
+		object.prevWord2 == "hồ" and object.prevWord1 == "chính" and object.word == "vinh" : object.conclusion = "I"
+		object.prevWord1 == "đồng" and object.word == "bằng" : object.conclusion = "I"
+		object.prevWord1 == "robin" : object.conclusion = "I"
+		object.prevWord1 == "bé" and object.word == "n." : object.conclusion = "I"
+		object.word == "minh" and object.nextWord1 == "" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "đán" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord2 == "" and object.prevWord1 == "bảo" and object.word == "trung" : object.conclusion = "I"
+		object.prevWord1 == "dân" and object.word == "làng" : object.conclusion = "I"
+		object.prevWord2 == "có" and object.word == "thọ" : object.conclusion = "I"
+		object.prevWord1 == "sự" and object.word == "cố" : object.conclusion = "I"
+		object.prevWord1 == "căn" and object.word == "cứ" : object.conclusion = "I"
+		object.prevWord2 == "mặt" and object.word == "biển" : object.conclusion = "I"
+		object.prevWord1 == "cầu" and object.word == "môn" : object.conclusion = "I"
+		object.prevWord1 == "thành" and object.word == "sơn" : object.conclusion = "I"
+		object.prevWord1 == "vận" and object.word == "tải" : object.conclusion = "I"
+		object.prevWord2 == "huỳnh" and object.word == "mỹ" : object.conclusion = "I"
+		object.prevWord1 == "tùng" and object.word == "mậu" : object.conclusion = "I"
+		object.prevWord1 == "xác" and object.word == "định" : object.conclusion = "I"
+		object.prevWord1 == "vũ" and object.nextWord1 == "bình" : object.conclusion = "I"
+		object.prevWord1 == "bùi" : object.conclusion = "I"
+			object.prevWord2 == "sông" : object.conclusion = "B"
+		object.prevWord2 == "do" and object.prevWord1 == "dự" and object.word == "án" : object.conclusion = "I"
+		object.prevWord1 == "giải" and object.word == "thích" : object.conclusion = "I"
+		object.prevWord1 == "sông" and object.word == "bé" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "chính" : object.conclusion = "I"
+		object.prevWord2 == "thấy" and object.prevWord1 == "bà" and object.word == "con" : object.conclusion = "I"
+		object.word == "hưng" and object.nextWord1 == "" and object.nextWord2 == "" : object.conclusion = "I"
+		object.prevWord2 == "lúa" and object.prevWord1 == "hè" and object.word == "thu" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "tượng" and object.nextTag1 == "B" : object.conclusion = "I"
+			object.prevWord2 == "vận" and object.prevWord1 == "chuyển" and object.word == "tượng" : object.conclusion = "B"
+			object.nextWord1 == "phật" : object.conclusion = "B"
+		object.prevWord1 == "chạy" and object.word == "lấy" and object.nextWord1 == "người" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "bảo" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "vệ" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "giáo" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "đoàn" and object.nextWord1 == "hài" : object.conclusion = "I"
+		object.prevWord1 == "mặt" and object.word == "tiền" : object.conclusion = "I"
+		object.prevWord2 == "đảo" and object.prevWord1 == "ba" and object.word == "bình" : object.conclusion = "I"
+		object.prevWord1 == "trà" and object.word == "nóc" : object.conclusion = "I"
+		object.prevWord1 == "mẹ" and object.word == "con" : object.conclusion = "I"
+		object.prevWord1 == "tay" and object.word == "trái" : object.conclusion = "I"
+		object.prevWord2 == "lên" and object.prevWord1 == "tiếng" and object.word == "nói" : object.conclusion = "I"
+		object.prevWord2 == "đường" and object.prevWord1 == "đất" and object.word == "đỏ" : object.conclusion = "I"
+		object.prevWord2 == "máy" and object.prevWord1 == "bay" and object.word == "trực" : object.conclusion = "I"
+		object.prevWord2 == "thuật" and object.word == "kè" : object.conclusion = "I"
+		object.prevWord1 == "công" and object.word == "sở" : object.conclusion = "I"
+		object.prevWord1 == "tự" and object.word == "trung" : object.conclusion = "I"
+		object.prevWord1 == "ông" and object.word == "dầu" : object.conclusion = "I"
+		object.prevTag2 == "B" and object.prevTag1 == "I" and object.word == "t." : object.conclusion = "I"
+		object.prevWord1 == "bình" and object.word == "thường" : object.conclusion = "I"
+		object.word == "hoàn" and object.nextWord2 == "sđk" : object.conclusion = "I"
+		object.prevWord1 == "tiêu" and object.word == "huỷ" : object.conclusion = "I"
+		object.prevWord1 == "công" and object.word == "sương" : object.conclusion = "I"
+		object.prevWord2 == "mất" and object.prevWord1 == "sức" and object.word == "lao" : object.conclusion = "I"
+		object.prevWord1 == "quan" and object.word == "hành" and object.nextWord1 == "chính" : object.conclusion = "I"
+		object.prevWord1 == "tết" and object.nextWord1 == "lịch" : object.conclusion = "I"
+		object.prevWord1 == "công" and object.word == "văn" : object.conclusion = "I"
+		object.prevWord1 == "fred" : object.conclusion = "I"
+		object.prevWord1 == "y" and object.word == "xoan" : object.conclusion = "I"
+		object.prevWord1 == "ea" : object.conclusion = "I"
+		object.prevWord1 == "mỹ" and object.word == "hưng" : object.conclusion = "I"
+		object.prevWord2 == "bác" and object.word == "nhị" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "sen" : object.conclusion = "I"
+		object.prevWord1 == "lực" and object.word == "lượng" : object.conclusion = "I"
+		object.prevWord2 == "ra" and object.prevWord1 == "hiệu" and object.word == "ứng" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "hiến" : object.conclusion = "I"
+		object.prevWord1 == "ba" and object.word == "bò" : object.conclusion = "I"
+		object.prevWord1 == "vũ" and object.word == "thượng" : object.conclusion = "I"
+		object.prevWord1 == "quan" and object.word == "quyền" and object.nextWord1 == "lực" : object.conclusion = "I"
+		object.prevWord1 == "con" and object.word == "công" : object.conclusion = "I"
+		object.prevWord1 == "a" and object.word == "vương" : object.conclusion = "I"
+		object.prevWord1 == "thế" and object.word == "nào" : object.conclusion = "I"
+		object.prevWord1 == "ba" and object.word == "xây" : object.conclusion = "I"
+		object.prevWord2 == "chạy" and object.prevWord1 == "lấy" and object.word == "người" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "nguyện" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord1 == "thành" and object.word == "lộc" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "cót" : object.conclusion = "I"
+		object.prevWord1 == "bố" and object.word == "lá" : object.conclusion = "I"
+		object.prevWord1 == "ông" and object.word == "đội" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "đào" : object.conclusion = "I"
+		object.prevWord1 == "chính" and object.word == "trị" : object.conclusion = "I"
+		object.word == "cua" and object.nextWord1 == "bắt" : object.conclusion = "I"
+		object.word == "tập" and object.nextTag1 == "" and object.nextTag2 == "" : object.conclusion = "I"
+		object.word == "phương" and object.nextWord1 == "" and object.nextWord2 == "" : object.conclusion = "I"
+		object.word == "khó" and object.nextWord1 == "ló" : object.conclusion = "I"
+		object.word == "khé" : object.conclusion = "I"
+		object.prevWord1 == "tâm" and object.word == "sự" : object.conclusion = "I"
+		object.prevWord1 == "http://www.aafv.org/petition/petition" and object.nextWord1 == ")" : object.conclusion = "I"
+		object.prevTag2 == "" and object.prevTag1 == "B" and object.word == "bích" : object.conclusion = "I"
+		object.prevWord1 == "a" and object.word == "giói" : object.conclusion = "I"
+		object.prevWord1 == "áo" and object.word == "quan" : object.conclusion = "I"
+		object.prevWord2 == "đêm" and object.word == "trời" : object.conclusion = "I"
+		object.word == "tháng" and object.nextWord2 == "nga" : object.conclusion = "I"
+		object.prevWord1 == "hội" and object.word == "điển" : object.conclusion = "I"
+		object.word == "ló" and object.nextWord1 == "cái" : object.conclusion = "I"
+		object.prevWord2 == "vĩnh" and object.word == "b" : object.conclusion = "I"
+		object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "ryan" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "khỉ" : object.conclusion = "I"
+		object.prevWord2 == "ông" and object.prevWord1 == "ba" and object.word == "đờn" : object.conclusion = "I"
+		object.prevWord1 == "tình" and object.word == "huống" : object.conclusion = "I"
+		object.prevWord2 == "tối" and object.word == "om" : object.conclusion = "I"
+		object.prevWord1 == "trí" and object.word == "nhớ" : object.conclusion = "I"
+		object.word == "ân" and object.nextWord1 == "" and object.nextWord2 == "" : object.conclusion = "I"
+		object.prevWord1 == "thương" and object.word == "tín" : object.conclusion = "I"
+		object.prevWord2 == "thanh" and object.prevWord1 == "xuân" and object.word == "bắc" : object.conclusion = "I"
+		object.word == "ericsson" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord2 == "vô" and object.word == "tâm" : object.conclusion = "I"
+		object.prevWord1 == "giới" and object.word == "hạn" : object.conclusion = "I"
+		object.word == "a" and object.nextWord1 == "tho" : object.conclusion = "I"
+		object.prevWord1 == "bế" and object.nextWord1 == "đàn" : object.conclusion = "I"
+		object.prevWord2 == "hỏi" and object.word == "hỏi" : object.conclusion = "I"
+		object.prevWord1 == "ló" and object.nextWord1 == "khôn" : object.conclusion = "I"
+		object.word == "xèng" : object.conclusion = "I"
+		object.word == "xuân" and object.nextWord1 == "loan" : object.conclusion = "I"
+		object.prevWord2 == "rạch" and object.word == "mé" : object.conclusion = "I"
+		object.prevWord1 == "khánh" and object.word == "ngọc" : object.conclusion = "I"
+		object.word == "vô" and object.nextWord2 == "bóng" : object.conclusion = "I"
+		object.prevWord2 == "mười" and object.word == "1917" : object.conclusion = "I"
+		object.prevWord1 == "tháng" and object.word == "mười" and object.nextWord1 == "nga" : object.conclusion = "I"
+		object.prevWord2 == "đất" and object.prevWord1 == "màu" and object.word == "mỡ" : object.conclusion = "I"
+		object.prevWord1 == "đường" and object.word == "dây" : object.conclusion = "I"
+		object.prevWord1 == "ngất" and object.word == "ngưởng" : object.conclusion = "I"
+		object.prevWord1 == "ra" and object.word == "vào" and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord2 == "bóng" and object.prevWord1 == "đá" and object.word == "châu" : object.conclusion = "I"
+		object.prevWord1 == "trẻ" and object.nextWord1 == "nhật" : object.conclusion = "I"
+		object.prevWord2 == "chị" and object.word == "hừng" : object.conclusion = "I"
+		object.word == "châu" and object.nextWord1 == "" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "loan" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord2 == "an" and object.prevWord1 == "ninh" and object.word == "đông" : object.conclusion = "I"
+		object.prevWord2 == "đoàn" and object.word == "hài" : object.conclusion = "I"
+		object.prevWord1 == "thương" and object.word == "yêu" : object.conclusion = "I"
+		object.word == "cửu" and object.nextWord1 == "long" and object.nextWord2 == "có" : object.conclusion = "I"
+		object.prevWord2 == "dì" and object.word == "lặt" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "luốc" : object.conclusion = "I"
+		object.word == "luyến" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord1 == "báo" and object.word == "tử" : object.conclusion = "I"
+		object.prevWord1 == "khó" and object.word == "dễ" : object.conclusion = "I"
+		object.prevWord1 == "điều" and object.word == "khoản" : object.conclusion = "I"
+		object.prevWord2 == "ngày" and object.prevWord1 == "một" and object.word == "số" : object.conclusion = "I"
+		object.prevWord1 == "điện" and object.word == "phong" and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord1 == "hoà" and object.word == "bình" and object.nextWord1 == "2004" : object.conclusion = "I"
+		object.prevWord2 == "nam" and object.prevWord1 == "thời" and object.word == "hội" : object.conclusion = "I"
+		object.prevWord1 == "làm" and object.word == "thuê" : object.conclusion = "I"
+		object.word == "sử" and object.nextTag1 == "" and object.nextTag2 == "" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "c.đ." : object.conclusion = "I"
+		object.prevWord2 == "bắc" and object.word == "chánh" : object.conclusion = "I"
+		object.prevWord1 == "sinh" and object.word == "nhật" : object.conclusion = "I"
+		object.prevWord2 == "từ" and object.prevWord1 == "đường" and object.word == "cơ" : object.conclusion = "I"
+		object.prevWord2 == "có" and object.prevWord1 == "lí" and object.word == "do" : object.conclusion = "I"
+		object.nextWord1 == "thuột" : object.conclusion = "I"
+		object.prevWord2 == "có" and object.prevWord1 == "tình" and object.word == "cảm" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "nạ" : object.conclusion = "I"
+		object.prevWord2 == "vô" and object.word == "bóng" : object.conclusion = "I"
+		object.prevWord1 == "phạt" and object.nextWord1 == "tiếp" : object.conclusion = "I"
+		object.prevWord1 == "lập" and object.word == "phúc" : object.conclusion = "I"
+		object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "reef" : object.conclusion = "I"
+		object.prevWord1 == "nam" and object.word == "nhất" and object.nextWord1 == "thống" : object.conclusion = "I"
+		object.prevWord2 == "cống" and object.word == "hàn" : object.conclusion = "I"
+		object.prevWord2 == "sông" and object.prevWord1 == "ba" and object.word == "hạ" : object.conclusion = "I"
+		object.prevWord2 == "có" and object.prevWord1 == "hạng" and object.word == "mục" : object.conclusion = "I"
+		object.prevWord2 == "dì" and object.word == "lượm" : object.conclusion = "I"
+		object.prevWord2 == "vĩnh" and object.prevWord1 == "thạnh" and object.word == "trung" : object.conclusion = "I"
+		object.prevWord1 == "bến" and object.word == "bình" and object.nextWord1 == "đông" : object.conclusion = "I"
+		object.prevWord1 == "bến" and object.nextWord1 == "đình" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "sạn" : object.conclusion = "I"
+		object.prevWord1 == "cái" and object.word == "khôn" and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord2 == "cầm" and object.prevWord1 == "văn" and object.word == "kình" : object.conclusion = "I"
+		object.prevWord2 == "thành" and object.prevWord1 == "sự" and object.word == "thật" : object.conclusion = "I"
+		object.prevWord1 == "v." and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord1 == "cửa" and object.word == "ông" : object.conclusion = "I"
+		object.prevWord1 == "trẻ" and object.word == "em" : object.conclusion = "I"
+		object.prevWord1 == "quang" and object.nextWord1 == "-" : object.conclusion = "I"
+		object.word == "sp." and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord2 == "làm" and object.word == "loạt" : object.conclusion = "I"
+		object.prevWord1 == "hình" and object.nextWord1 == "nhật" : object.conclusion = "I"
+		object.prevWord2 == "nhà" and object.prevWord1 == "giáo" and object.word == "dục" : object.conclusion = "I"
+		object.word == "v." and object.nextWord2 == "," : object.conclusion = "I"
+		object.prevTag2 == "B" and object.prevTag1 == "B" and object.word == "din" : object.conclusion = "I"
+		object.prevWord2 == "thống" and object.word == "hồng" : object.conclusion = "I"
+		object.prevWord1 == "tới" and object.nextWord1 == "lui" : object.conclusion = "I"
+		object.prevWord1 == "yêu" and object.word == "thương" : object.conclusion = "I"
+		object.prevWord2 == "mò" and object.prevWord1 == "cua" and object.word == "bắt" : object.conclusion = "I"
+		object.word == "lực" and object.nextTag1 == "" : object.conclusion = "I"
+		object.prevWord2 == "nguyễn" and object.word == "h." : object.conclusion = "I"
+		object.prevWord1 == "chủ" and object.word == "yếu" : object.conclusion = "I"
+		object.prevWord1 == "cai" and object.word == "nghiện" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "yol" : object.conclusion = "I"
+		object.prevWord1 == "chân" and object.nextWord1 == "chân" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "tá" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "tắp" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "gia" and object.nextTag1 == "B" : object.conclusion = "I"
+			object.prevWord1 == "lão" : object.conclusion = "B"
+		object.prevWord1 == "phan" and object.nextWord1 == "" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "buồm" : object.conclusion = "I"
+		object.prevWord1 == "hương" and object.word == "thảo" : object.conclusion = "I"
+		object.word == "modified" and object.nextTag1 == "B" and object.nextTag2 == "B" : object.conclusion = "I"
+		object.prevWord1 == "y" and object.word == "con" : object.conclusion = "I"
+		object.prevWord1 == "cửa" and object.word == "dương" and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord2 == "''" and object.prevWord1 == "nghe" and object.word == "nói" : object.conclusion = "I"
+		object.prevWord1 == "tố" and object.word == "oanh" : object.conclusion = "I"
+		object.prevWord2 == "nguyễn" and object.prevWord1 == "hữu" and object.word == "hạnh" : object.conclusion = "I"
+		object.prevWord1 == "giới" and object.word == "thiệu" : object.conclusion = "I"
+		object.prevTag1 == "I" and object.word == "doanh" and object.nextTag1 == "B" : object.conclusion = "I"
+		object.prevWord2 == "ăn" and object.prevWord1 == "của" and object.word == "để" : object.conclusion = "I"
+		object.prevWord1 == "châu" and object.word == "mỹ" : object.conclusion = "I"
+		object.prevWord1 == "trường" and object.word == "lưu" : object.conclusion = "I"
+		object.prevWord2 == "ngành" and object.prevWord1 == "hàng" and object.word == "không" : object.conclusion = "I"
+		object.prevWord1 == "đồng" and object.word == "mô" and object.nextWord1 == "(" : object.conclusion = "I"
+		object.prevWord1 == "trước" and object.word == "đây" and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord1 == "culex" : object.conclusion = "I"
+		object.prevWord2 == "nam" and object.prevWord1 == "trung" and object.word == "quốc" : object.conclusion = "I"
+		object.prevWord1 == "ả" and object.word == "rập" : object.conclusion = "I"
+		object.prevWord1 == "thuốc" and object.word == "tây" : object.conclusion = "I"
+		object.prevWord2 == ":" and object.prevWord1 == "khu" and object.word == "bảo" : object.conclusion = "I"
+		object.word == "mơn" and object.nextTag1 == "B" and object.nextTag2 == "B" : object.conclusion = "I"
+		object.prevWord1 == "tình" and object.nextWord1 == "nghĩa" : object.conclusion = "I"
+		object.prevWord2 == "của" and object.prevWord1 == "ăn" and object.word == "của" : object.conclusion = "I"
+		object.prevWord2 == "làm" and object.prevWord1 == "chứng" and object.word == "minh" : object.conclusion = "I"
+		object.prevWord1 == "cùng" and object.word == "cực" : object.conclusion = "I"
+		object.prevTag1 == "B" and object.word == "b.fall" : object.conclusion = "I"
+		object.prevWord1 == "nguyên" and object.nextWord1 == "-" : object.conclusion = "I"
+		object.prevWord1 == "to" and object.nextWord1 == ")" : object.conclusion = "I"
+		object.prevWord2 == "cảng" and object.prevWord1 == "ba" and object.word == "cấp" : object.conclusion = "I"
+		object.prevWord1 == "long" and object.word == "b" : object.conclusion = "I"
+		object.prevWord1 == "vận" and object.word == "hành" : object.conclusion = "I"
+		object.prevWord1 == "bác" and object.word == "tôn" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "mẹt" : object.conclusion = "I"
+		object.prevWord2 == "vào" and object.prevWord1 == "đề" and object.word == "tài" : object.conclusion = "I"
+		object.prevWord2 == "thành" and object.prevWord1 == "tâm" and object.word == "điểm" : object.conclusion = "I"
+		object.prevWord2 == "xã" and object.word == "hạ" : object.conclusion = "I"
+		object.prevWord1 == "chi" and object.word == "trả" : object.conclusion = "I"
+		object.prevWord1 == "song" and object.word == "phụng" : object.conclusion = "I"
+		object.word == "đi" and object.nextWord1 == "tìm" and object.nextWord2 == "đường" : object.conclusion = "I"
+		object.prevWord1 == "y" and object.word == "mi" : object.conclusion = "I"
+		object.prevWord1 == "bộ" and object.word == "đội" : object.conclusion = "I"
+			object.nextTag1 == "B" and object.nextTag2 == "I" : object.conclusion = "B"
+		object.prevWord2 == "hết" and object.word == "mắt" : object.conclusion = "I"
+		object.prevWord2 == "tới" and object.word == "lui" : object.conclusion = "I"
+		object.prevWord1 == "hai" and object.word == "long" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "bàng" : object.conclusion = "I"
+		object.prevWord1 == "cửa" and object.word == "tùng" : object.conclusion = "I"
+		object.prevWord2 == "ông" and object.prevWord1 == "từ" and object.word == "chối" : object.conclusion = "I"
+		object.prevWord1 == "ô" and object.word == "chợ" and object.nextWord1 == "dừa" : object.conclusion = "I"
+		object.word == "toàn" and object.nextWord1 == "tập" and object.nextWord2 == "," : object.conclusion = "I"
+		object.prevWord1 == "vàng" and object.word == "vàng" : object.conclusion = "I"
+		object.prevWord1 == "không" and object.word == "khí" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "liêm" : object.conclusion = "I"
+		object.prevWord2 == "bệnh" and object.prevWord1 == "viêm" and object.word == "não" : object.conclusion = "I"
+			object.prevWord1 == "viêm" and object.word == "não" and object.nextWord1 == "siêu" : object.conclusion = "B"
+		object.prevWord2 == "người" and object.prevWord1 == "thân" and object.word == "quen" : object.conclusion = "I"
+		object.prevWord1 == "nhất" and object.word == "a" : object.conclusion = "I"
+		object.prevWord2 == "vua" and object.word == "thái" : object.conclusion = "I"
+		object.prevWord1 == "thế" and object.word == "giới" : object.conclusion = "I"
+		object.prevWord2 == "" and object.prevWord1 == "nào" and object.word == "là" : object.conclusion = "I"
+		object.word == "tới" and object.nextWord2 == "lui" : object.conclusion = "I"
+		object.prevWord1 == "tết" and object.word == "tây" : object.conclusion = "I"
+		object.prevWord2 == "tân" and object.word == "sanh" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "mã" : object.conclusion = "I"
+		object.prevWord1 == "modified" : object.conclusion = "I"
+		object.word == "na" and object.nextWord1 == "," : object.conclusion = "I"
+		object.prevWord1 == "tiền" and object.word == "đề" : object.conclusion = "I"
+		object.prevWord1 == "nam" and object.word == "thời" and object.nextWord1 == "hội" : object.conclusion = "I"
+		object.prevWord1 == "tây" and object.word == "bắc" and object.nextWord1 == "-" : object.conclusion = "I"
+		object.word == "ăn" and object.nextWord1 == "của" and object.nextWord2 == "để" : object.conclusion = "I"
+		object.prevWord1 == "quân" and object.word == "đội" : object.conclusion = "I"
+		object.prevWord2 == "tôn" and object.prevWord1 == "thất" and object.word == "lập" : object.conclusion = "I"
+		object.prevWord2 == "hay" and object.prevWord1 == "biết" and object.word == "mấy" : object.conclusion = "I"
+		object.prevWord1 == "chợ" and object.word == "dừa" : object.conclusion = "I"
+		object.prevWord2 == "cầu" and object.prevWord1 == "nối" and object.word == "tiếp" : object.conclusion = "I"
+		object.prevWord1 == "sông" and object.word == "đốc" : object.conclusion = "I"
+		object.prevWord2 == "-" and object.prevWord1 == "đông" and object.word == "nam" : object.conclusion = "I"
+		object.word == "an" and object.nextWord1 == "(" : object.conclusion = "I"
+		object.prevWord1 == "hồ" and object.word == "thanh" and object.nextWord1 == "sơn" : object.conclusion = "I"
+		object.prevWord1 == "chính" and object.word == "gián" : object.conclusion = "I"
+		object.prevWord1 == "bến" and object.word == "đình" : object.conclusion = "I"
+		object.prevWord1 == "sông" and object.nextWord1 == "hạ" : object.conclusion = "I"
+		object.prevWord2 == "ra" and object.prevWord1 == "mặt" and object.word == "đường" : object.conclusion = "I"
+		object.prevWord2 == "đi" and object.prevWord1 == "tính" and object.word == "lại" : object.conclusion = "I"
+		object.prevWord1 == "nhà" and object.word == "trắng" : object.conclusion = "I"
+		object.prevWord1 == "á" and object.word == "đông" : object.conclusion = "I"
+		object.prevWord1 == "tàu" and object.nextWord1 == "ngầm" : object.conclusion = "I"
+		object.prevWord1 == "sông" and object.word == "cái" : object.conclusion = "I"
+		object.prevWord2 == "tàu" and object.word == "ngầm" : object.conclusion = "I"
+		object.prevWord1 == "đi" and object.word == "tính" and object.nextWord1 == "lại" : object.conclusion = "I"
+		object.prevWord1 == "cùng" and object.word == "với" and object.nextWord1 == "hệ" : object.conclusion = "I"
+		object.prevWord1 == "năm" and object.word == "hương" : object.conclusion = "I"
+		object.prevWord1 == "chắc" and object.word == "băng" : object.conclusion = "I"
+		object.prevWord1 == "viện" and object.word == "phí" : object.conclusion = "I"
+		object.prevWord1 == "yêu" and object.word == "cầu" : object.conclusion = "I"
+		object.prevWord1 == "phục" and object.word == "vụ" : object.conclusion = "I"
+		object.prevWord1 == "thời" and object.word == "vụ" : object.conclusion = "I"
+		object.prevWord1 == "ba" and object.word == "đồn" : object.conclusion = "I"
+		object.prevWord1 == "bích" and object.word == "dậu" : object.conclusion = "I"
+		object.prevWord1 == "cam" and object.word == "ly" : object.conclusion = "I"
+		object.prevWord2 == "tăng" and object.prevWord1 == "trọng" and object.word == "lượng" : object.conclusion = "I"
+		object.prevWord1 == "sĩ" and object.word == "quan" : object.conclusion = "I"
+		object.prevWord1 == "hàng" and object.word == "đường" : object.conclusion = "I"
+		object.word == "a" and object.nextWord2 == "huyện" : object.conclusion = "I"
+		object.word == "đi" and object.nextWord1 == "tính" and object.nextWord2 == "lại" : object.conclusion = "I"
+		object.prevWord1 == "hỏi" and object.nextWord1 == "hỏi" : object.conclusion = "I"
+		object.prevWord1 == "phận" and object.word == "sự" : object.conclusion = "I"
+		object.prevWord1 == "học" and object.word == "vấn" : object.conclusion = "I"
+		object.prevWord2 == "xe" and object.prevWord1 == "ra" and object.word == "vào" : object.conclusion = "I"
+		object.prevWord1 == "gió" and object.word == "lào" : object.conclusion = "I"
+		object.prevWord1 == "biển" and object.word == "đông" and object.nextWord1 == "." : object.conclusion = "I"
+		object.prevWord2 == "cầu" and object.prevWord1 == "mỹ" and object.word == "thanh" : object.conclusion = "I"
+		object.prevWord1 == "cho" and object.word == "biết" and object.nextWord1 == "quan" : object.conclusion = "I"
+		object.prevWord2 == "các" and object.prevWord1 == "làng" and object.word == "quê" : object.conclusion = "I"
+		object.prevWord1 == "ba" and object.word == "thọ" : object.conclusion = "I"
+		object.prevWord1 == "năm" and object.word == "độ" : object.conclusion = "I"
+		object.prevWord2 == "cá" and object.prevWord1 == "cửa" and object.word == "đại" : object.conclusion = "I"
+		object.prevWord2 == "không" and object.prevWord1 == "trung" and object.word == "thực" : object.conclusion = "I"
diff --git a/VnCoreNLP/pom.xml b/VnCoreNLP/pom.xml
new file mode 100644
index 0000000000000000000000000000000000000000..0bf5ed6ba7016ef15a8e075c49e636bfc3cb7798
--- /dev/null
+++ b/VnCoreNLP/pom.xml
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>VnCoreNLP</groupId>
+    <artifactId>VnCoreNLP</artifactId>
+    <version>1.2</version>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.3</version>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.1.0</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <shadedArtifactAttached>false</shadedArtifactAttached>
+                            <transformers>
+                                <!-- add Main-Class to manifest file -->
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <mainClass>vn.pipeline.VnCoreNLP</mainClass>
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
+
+    <dependencies>
+
+        <dependency>
+            <groupId>com.optimaize.languagedetector</groupId>
+            <artifactId>language-detector</artifactId>
+            <version>0.6</version>
+        </dependency>
+
+        <dependency>
+            <groupId>vncorenlp</groupId>
+            <artifactId>marmot</artifactId>
+            <version>1.0</version>
+        </dependency>
+
+        <dependency>
+            <groupId>edu.emory.mathcs.nlp</groupId>
+            <artifactId>nlp4j-api</artifactId>
+            <version>1.1.3</version>
+        </dependency>
+
+        <dependency>
+           <groupId>log4j</groupId>
+           <artifactId>log4j</artifactId>
+           <version>1.2.17</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-log4j12</artifactId>
+            <version>1.7.5</version>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.25</version>
+        </dependency>
+
+    <!--mvn install:install-file -Dfile=lib/marmot.jar -DgroupId=vncorenlp -DartifactId=marmot -Dversion=1.0 -Dpackaging=jar-->
+    </dependencies>
+    <repositories>
+        <repository>
+            <id>vncorenlp</id>
+            <name>vncorenlp thirdparty repo</name>
+            <url>https://github.com/vncorenlp/thirdparty/raw/repository/</url>
+            <snapshots>
+                <enabled>true</enabled>
+                <updatePolicy>always</updatePolicy>
+            </snapshots>
+        </repository>
+    </repositories>
+
+</project>
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/ner/NerRecognizer.java b/VnCoreNLP/src/main/java/vn/corenlp/ner/NerRecognizer.java
new file mode 100644
index 0000000000000000000000000000000000000000..ab74dfa6c70c4f509dd2ee0cac1c7dbd433bceea
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/ner/NerRecognizer.java
@@ -0,0 +1,85 @@
+package vn.corenlp.ner;
+
+import edu.emory.mathcs.nlp.common.util.NLPUtils;
+import edu.emory.mathcs.nlp.component.template.NLPComponent;
+
+import edu.emory.mathcs.nlp.component.template.lexicon.GlobalLexica;
+import edu.emory.mathcs.nlp.component.template.node.FeatMap;
+import edu.emory.mathcs.nlp.component.template.node.NLPNode;
+import edu.emory.mathcs.nlp.decode.NLPDecoder;
+import org.apache.log4j.Logger;
+import vn.corenlp.wordsegmenter.Vocabulary;
+import vn.pipeline.LexicalInitializer;
+import vn.pipeline.Word;
+import vn.pipeline.Utils;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class NerRecognizer {
+    private NLPDecoder nlpDecoder ;
+    public final static Logger LOGGER = Logger.getLogger(NerRecognizer.class);
+    private static NerRecognizer nerRecognizer;
+    public static NerRecognizer initialize() throws IOException{
+        if(nerRecognizer == null) {
+            nerRecognizer = new NerRecognizer();
+        }
+        return nerRecognizer;
+    }
+
+
+    public NerRecognizer() throws IOException{
+        LOGGER.info("Loading NER model");
+        nlpDecoder = new NLPDecoder();
+        List<NLPComponent<NLPNode>> components = new ArrayList();
+
+        String modelPath = Utils.jarDir + "/models/ner/vi-ner.xz";
+        if (!new File(modelPath).exists()) throw new IOException("NerRecognizer: " + modelPath + " is not found!");
+        GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
+        if(lexica != null) {
+            components.add(lexica);
+        }
+        components.add(NLPUtils.getComponent(modelPath));
+        nlpDecoder.setComponents(components);
+
+    }
+
+
+    public void tagSentence(List<Word> sentenceWords) {
+        NLPNode[] decodedNodes = nlpDecoder.decode(toNodeArray(sentenceWords));
+        for(int i = 0; i < sentenceWords.size(); i++) {
+            Word word = sentenceWords.get(i);
+            word.setNerLabel(decodedNodes[i + 1].getNamedEntityTag().replace("U-", "B-").replace("L-", "I-"));
+        }
+    }
+
+    private NLPNode[] toNodeArray(List<Word> sentenceWords) {
+        NLPNode[] nlpNodes = new NLPNode[sentenceWords.size() + 1];
+        nlpNodes[0] = new NLPNode();
+        for(int i = 0; i < sentenceWords.size(); i++) {
+            Word word = sentenceWords.get(i);
+            nlpNodes[i + 1] = new NLPNode(word.getIndex(), word.getForm(), word.getForm(), addLabelForPOSTag(word), new FeatMap());
+
+        }
+        return nlpNodes;
+    }
+
+    public String addLabelForPOSTag(Word word) {
+        String[] tokens = word.getForm().split("_");
+        String output = word.getPosTag();
+        if (word.getPosTag() != null && word.getPosTag().equals("Np")) {
+            if (Vocabulary.VN_FAMILY_NAMES.contains(tokens[0].toLowerCase())
+                || (tokens.length > 1 && Vocabulary.VN_MIDDLE_NAMES.contains(tokens[1].toLowerCase())))
+                output = word.getPosTag() + "-1";
+            else output = word.getPosTag() + "-0";
+        }
+        return output;
+    }
+
+    public static void main(String[] args) {
+
+
+    }
+}
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/parser/DependencyParser.java b/VnCoreNLP/src/main/java/vn/corenlp/parser/DependencyParser.java
new file mode 100644
index 0000000000000000000000000000000000000000..488a42a06a45f002657c789626d77eff3a2c9072
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/parser/DependencyParser.java
@@ -0,0 +1,74 @@
+package vn.corenlp.parser;
+
+import edu.emory.mathcs.nlp.common.util.NLPUtils;
+import edu.emory.mathcs.nlp.component.template.NLPComponent;
+
+import edu.emory.mathcs.nlp.component.template.lexicon.GlobalLexica;
+import edu.emory.mathcs.nlp.component.template.node.FeatMap;
+import edu.emory.mathcs.nlp.component.template.node.NLPNode;
+import edu.emory.mathcs.nlp.decode.NLPDecoder;
+import org.apache.log4j.Logger;
+import vn.pipeline.LexicalInitializer;
+import vn.pipeline.Word;
+import vn.pipeline.Utils;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class DependencyParser {
+    private NLPDecoder nlpDecoder ;
+    public final static Logger LOGGER = Logger.getLogger(DependencyParser.class);
+    private static DependencyParser dependencyParser;
+    public static DependencyParser initialize() throws IOException {
+        if(dependencyParser == null) {
+            dependencyParser = new DependencyParser();
+        }
+        return dependencyParser;
+    }
+
+    public DependencyParser() throws IOException {
+        LOGGER.info("Loading Dependency Parsing model");
+        nlpDecoder = new NLPDecoder();
+        List<NLPComponent<NLPNode>> components = new ArrayList();
+
+        String modelPath = Utils.jarDir + "/models/dep/vi-dep.xz";
+        if (!new File(modelPath).exists()) throw new IOException("DependencyParser: " + modelPath + " is not found!");
+        GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
+        if(lexica != null) {
+            components.add(lexica);
+        }
+        components.add(NLPUtils.getComponent(modelPath));
+        nlpDecoder.setComponents(components);
+
+    }
+
+    public void tagSentence(List<Word> sentenceWords) {
+        NLPNode[] decodedNodes = nlpDecoder.decode(toNodeArray(sentenceWords));
+        for(int i = 0; i < sentenceWords.size(); i++) {
+            Word word = sentenceWords.get(i);
+            word.setHead(decodedNodes[i + 1].getDependencyHead().getID());
+            word.setDepLabel(decodedNodes[i + 1].getDependencyLabel());
+            if(word.getPosTag() != null && word.getPosTag().equals("CH")) word.setDepLabel("punct");
+        }
+    }
+
+    private NLPNode[] toNodeArray(List<Word> sentenceWords) {
+        NLPNode[] nlpNodes = new NLPNode[sentenceWords.size() + 1];
+        nlpNodes[0] = new NLPNode();
+        for(int i = 0; i < sentenceWords.size(); i++) {
+            Word word = sentenceWords.get(i);
+            //int id, String form, String lemma, String posTag, FeatMap feats
+            nlpNodes[i + 1] = new NLPNode(word.getIndex(), word.getForm(), word.getForm(),
+                    word.getPosTag(), new FeatMap());
+
+        }
+        return nlpNodes;
+    }
+
+    public static void main(String[] args) {
+
+
+    }
+}
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/postagger/PosTagger.java b/VnCoreNLP/src/main/java/vn/corenlp/postagger/PosTagger.java
new file mode 100644
index 0000000000000000000000000000000000000000..231d3f268dc71b9526c64757ec2a92c834c5d872
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/postagger/PosTagger.java
@@ -0,0 +1,65 @@
+package vn.corenlp.postagger;
+
+import marmot.morph.MorphTagger;
+import marmot.morph.Sentence;
+import marmot.morph.Word;
+
+import marmot.util.FileUtils;
+import org.apache.log4j.Logger;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import vn.pipeline.Utils;
+
+public class PosTagger {
+    private static PosTagger posTagger = null;
+    private MorphTagger tagger;
+    public final static Logger LOGGER = Logger.getLogger(PosTagger.class);
+    public PosTagger() throws IOException {
+        LOGGER.info("Loading POS Tagging model");
+        String modelPath = Utils.jarDir + "/models/postagger/vi-tagger";
+        if (!new File(modelPath).exists()) throw new IOException("PosTagger: " + modelPath + " is not found!");
+        tagger = FileUtils.loadFromFile(modelPath);
+
+    }
+
+    public static PosTagger initialize() throws IOException {
+        if(posTagger == null) {
+            posTagger = new PosTagger();
+        }
+        return posTagger;
+    }
+
+    public List<vn.pipeline.Word> tagSentence(String sentence) throws IOException {
+        List<vn.pipeline.Word> output = new ArrayList<>();
+        String line = sentence.trim();
+        if (line.length() == 0) {
+            return output;
+        }
+        String[] tokenstrs = line.split(" ");
+        LinkedList tokens = new LinkedList();
+
+        for(int i = 0; i < tokenstrs.length; ++i) {
+            if (!tokenstrs[i].isEmpty()) {
+                Word word = new Word(tokenstrs[i]);
+                tokens.add(word);
+            }
+        }
+
+        Sentence marmotSentence = new Sentence(tokens);
+        Object lemma_tags = tagger.tagWithLemma(marmotSentence);
+        for(int i = 0; i < marmotSentence.size(); ++i) {
+            List<String> token_lemma_tags = (List)((List)lemma_tags).get(i);
+            vn.pipeline.Word word = new vn.pipeline.Word((i + 1), marmotSentence.getWord(i).getWordForm(), (String)token_lemma_tags.get(1));
+            output.add(word);
+
+        }
+        return output;
+    }
+
+
+}
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/tokenizer/StringUtils.java b/VnCoreNLP/src/main/java/vn/corenlp/tokenizer/StringUtils.java
new file mode 100644
index 0000000000000000000000000000000000000000..a5726a38288b3cfa6da2d8f2e0a5f13a328ac130
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/tokenizer/StringUtils.java
@@ -0,0 +1,207 @@
+package vn.corenlp.tokenizer;
+
+import java.util.HashSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class StringUtils
+{
+
+    public static void testFoundByRegex(String s, String regex)
+    {
+        System.out.println("Test string: " + s);
+
+        Pattern pattern = Pattern.compile(regex);
+        Matcher matcher = pattern.matcher(s);
+        if (matcher.find()) {
+            System.out.println(s.substring(0, matcher.start()));
+            System.out.println(s.substring(matcher.start(), matcher.end()));
+            System.out.println(s.substring(matcher.end()));
+        }
+    }
+
+    public static String char2Hex(Character c)
+    {
+        return String.format("\\u%04x", (int) c);
+    }
+
+    public static Character hex2Char(String hex)
+    {
+        int hexToInt = Integer.parseInt(hex.substring(2), 16);
+        return (char) hexToInt;
+    }
+
+    public static boolean hasPunctuation(String s)
+    {
+        for (int i = 0; i < s.length(); i++) {
+            if (!Character.isLetterOrDigit(s.charAt(i)))
+                return true;
+        }
+
+        return false;
+    }
+
+    public static boolean isPunctuation(String s)
+    {
+        for (int i = 0; i < s.length(); i++) {
+            if (Character.isLetterOrDigit(s.charAt(i)))
+                return false;
+        }
+
+        return true;
+    }
+
+    public static boolean isNumeric(String s) {
+        return s != null && s.matches("[-+]?\\d*\\.?\\d+");
+    }
+
+    // Modified by Dat Quoc Nguyen
+    public static boolean isBrace(String string)
+    {
+        if (string.equals("”") || string.equals("�") || string.equals("'") || string.equals(")")
+                || string.equals("}") || string.equals("]")) {
+            return true;
+        }
+        return false;
+    }
+
+    public static HashSet<String> VN_abbreviation;
+    public static HashSet<String> VN_exception;
+    static {
+        VN_abbreviation = new HashSet<String>();
+        VN_exception = new HashSet<String>();
+
+        VN_abbreviation.add("M.City");
+        VN_abbreviation.add("V.I.P");
+        VN_abbreviation.add("PGS.Ts");
+        VN_abbreviation.add("MRS.");
+        VN_abbreviation.add("Mrs.");
+        VN_abbreviation.add("Man.United");
+        VN_abbreviation.add("Mr.");
+        VN_abbreviation.add("SHB.ĐN");
+        VN_abbreviation.add("Gs.Bs");
+        VN_abbreviation.add("U.S.A");
+        VN_abbreviation.add("TMN.CSG");
+        VN_abbreviation.add("Kts.Ts");
+        VN_abbreviation.add("R.Madrid");
+        VN_abbreviation.add("Tp.");
+        VN_abbreviation.add("T.Ư");
+        VN_abbreviation.add("D.C");
+        VN_abbreviation.add("Gs.Tskh");
+        VN_abbreviation.add("PGS.KTS");
+        VN_abbreviation.add("GS.BS");
+        VN_abbreviation.add("KTS.TS");
+        VN_abbreviation.add("PGS-TS");
+        VN_abbreviation.add("Co.");
+        VN_abbreviation.add("S.H.E");
+        VN_abbreviation.add("Ths.Bs");
+        VN_abbreviation.add("T&T.HN");
+        VN_abbreviation.add("MR.");
+        VN_abbreviation.add("Ms.");
+        VN_abbreviation.add("T.T.P");
+        VN_abbreviation.add("TT.");
+        VN_abbreviation.add("TP.");
+        VN_abbreviation.add("ĐH.QGHN");
+        VN_abbreviation.add("Gs.Kts");
+        VN_abbreviation.add("Man.Utd");
+        VN_abbreviation.add("GD-ĐT");
+        VN_abbreviation.add("T.W");
+        VN_abbreviation.add("Corp.");
+        VN_abbreviation.add("ĐT.LA");
+        VN_abbreviation.add("Dr.");
+        VN_abbreviation.add("T&T");
+        VN_abbreviation.add("HN.ACB");
+        VN_abbreviation.add("GS.KTS");
+        VN_abbreviation.add("MS.");
+        VN_abbreviation.add("Prof.");
+        VN_abbreviation.add("GS.TS");
+        VN_abbreviation.add("PGs.Ts");
+        VN_abbreviation.add("PGS.BS");
+        VN_abbreviation.add("﻿BT.");
+        VN_abbreviation.add("Ltd.");
+        VN_abbreviation.add("ThS.BS");
+        VN_abbreviation.add("Gs.Ts");
+        VN_abbreviation.add("SL.NA");
+        //VN_abbreviation.add("P.");
+        VN_abbreviation.add("Th.S");
+        VN_abbreviation.add("Gs.Vs");
+        VN_abbreviation.add("PGs.Bs");
+        VN_abbreviation.add("T.O.P");
+        VN_abbreviation.add("PGS.TS");
+        VN_abbreviation.add("HN.T&T");
+        VN_abbreviation.add("SG.XT");
+        VN_abbreviation.add("O.T.C");
+        VN_abbreviation.add("TS.BS");
+        VN_abbreviation.add("Yahoo!");
+        VN_abbreviation.add("Man.City");
+        VN_abbreviation.add("MISS.");
+        VN_abbreviation.add("HA.GL");
+        VN_abbreviation.add("GS.Ts");
+        VN_abbreviation.add("TBT.");
+        VN_abbreviation.add("GS.VS");
+        VN_abbreviation.add("GS.TSKH");
+        VN_abbreviation.add("Ts.Bs");
+        VN_abbreviation.add("M.U");
+        VN_abbreviation.add("Gs.TSKH");
+        VN_abbreviation.add("U.S");
+        VN_abbreviation.add("Miss.");
+        VN_abbreviation.add("GD.ĐT");
+        VN_abbreviation.add("PGs.Kts");
+        //VN_abbreviation.add("Q.");
+        VN_abbreviation.add("St.");
+        VN_abbreviation.add("Ng.");
+        VN_abbreviation.add("Inc.");
+        VN_abbreviation.add("Th.");
+        VN_abbreviation.add("N.O.V.A");
+
+        VN_exception.add("Wi-fi");
+        VN_exception.add("17+");
+        VN_exception.add("km/h");
+        VN_exception.add("M7");
+        VN_exception.add("M8");
+        VN_exception.add("21+");
+        VN_exception.add("G3");
+        VN_exception.add("M9");
+        VN_exception.add("G4");
+        VN_exception.add("km3");
+        VN_exception.add("m/s");
+        VN_exception.add("km2");
+        VN_exception.add("5g");
+        VN_exception.add("4G");
+        VN_exception.add("8K");
+        VN_exception.add("3g");
+        VN_exception.add("E9");
+        VN_exception.add("U21");
+        VN_exception.add("4K");
+        VN_exception.add("U23");
+        VN_exception.add("Z1");
+        VN_exception.add("Z2");
+        VN_exception.add("Z3");
+        VN_exception.add("Z4");
+        VN_exception.add("Z5");
+        VN_exception.add("Jong-un");
+        VN_exception.add("u19");
+        VN_exception.add("5s");
+        VN_exception.add("wi-fi");
+        VN_exception.add("18+");
+        VN_exception.add("Wi-Fi");
+        VN_exception.add("m2");
+        VN_exception.add("16+");
+        VN_exception.add("m3");
+        VN_exception.add("V-League");
+        VN_exception.add("Geun-hye");
+        VN_exception.add("5G");
+        VN_exception.add("4g");
+        VN_exception.add("Z3+");
+        VN_exception.add("3G");
+        VN_exception.add("km/s");
+        VN_exception.add("6+");
+        VN_exception.add("u21");
+        VN_exception.add("WI-FI");
+        VN_exception.add("u23");
+        VN_exception.add("U19");
+        VN_exception.add("6s");
+        VN_exception.add("4s");
+    }
+
+}
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/tokenizer/Tokenizer.java b/VnCoreNLP/src/main/java/vn/corenlp/tokenizer/Tokenizer.java
new file mode 100644
index 0000000000000000000000000000000000000000..dd95f058c8c91529aebb3c9126d353cd0fa009b2
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/tokenizer/Tokenizer.java
@@ -0,0 +1,397 @@
+package vn.corenlp.tokenizer;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+
+/**
+ * This class contains methods used for tokenization step.
+ *
+ * @author tuanphong94
+ * @link https://github.com/phongnt570/UETsegmenter/blob/master/src/vn/edu/vnu/uet/nlp/tokenizer/tokenizer.tokenizer.java
+ *
+ */
+public class Tokenizer {
+    /**
+     * @param s
+     * @return List of tokens from s
+     * @throws IOException
+     */
+    public static List<String> tokenize(String s) throws IOException {
+        if (s == null || s.trim().isEmpty()) {
+            return new ArrayList<String>();
+        }
+
+        String[] tempTokens = s.trim().split("\\s+");
+        if (tempTokens.length == 0) {
+            return new ArrayList<String>();
+        }
+
+        List<String> tokens = new ArrayList<String>();
+
+        for (String token : tempTokens) {
+            if (token.length() == 1 || !StringUtils.hasPunctuation(token)) {
+                tokens.add(token);
+                continue;
+            }
+
+            if (token.endsWith(",")) {
+                tokens.addAll(tokenize(token.substring(0, token.length() - 1)));
+                tokens.add(",");
+                continue;
+            }
+
+            if (StringUtils.VN_abbreviation.contains(token)) {
+                tokens.add(token);
+                continue;
+            }
+
+
+            if (token.endsWith(".") && Character.isAlphabetic(token.charAt(token.length() - 2))) {
+                if ((token.length() == 2 && Character.isUpperCase(token.charAt(token.length() - 2))) || (Pattern.compile(Regex.SHORT_NAME).matcher(token).find())) {
+                    tokens.add(token);
+                    continue;
+                }
+                tokens.addAll(tokenize(token.substring(0, token.length() - 1)));
+                tokens.add(".");
+                continue;
+            }
+
+            if (StringUtils.VN_exception.contains(token)) {
+                tokens.add(token);
+                continue;
+            }
+
+            boolean tokenContainsAbb = false;
+            for (String e : StringUtils.VN_abbreviation) {
+                int i = token.indexOf(e);
+                if (i < 0)
+                    continue;
+
+                tokenContainsAbb = true;
+                tokens = recursive(tokens, token, i, i + e.length());
+                break;
+            }
+            if (tokenContainsAbb)
+                continue;
+
+            boolean tokenContainsExp = false;
+            for (String e : StringUtils.VN_exception) {
+                int i = token.indexOf(e);
+                if (i < 0)
+                    continue;
+
+                tokenContainsExp = true;
+                tokens = recursive(tokens, token, i, i + e.length());
+                break;
+            }
+            if (tokenContainsExp)
+                continue;
+
+            List<String> regexes = Regex.getRegexList();
+
+            boolean matching = false;
+            for (String regex : regexes) {
+                if (token.matches(regex)) {
+                    tokens.add(token);
+                    matching = true;
+                    break;
+                }
+            }
+            if (matching) {
+                continue;
+            }
+
+            for (int i = 0; i < regexes.size(); i++) {
+                Pattern pattern = Pattern.compile(regexes.get(i));
+                Matcher matcher = pattern.matcher(token);
+
+                if (matcher.find()) {
+                    if (i == Regex.getRegexIndex("url")) {
+                        String[] elements = token.split(Pattern.quote("."));
+                        boolean hasURL = true;
+                        for (String ele : elements) {
+                            if (ele.length() == 1 && Character.isUpperCase(ele.charAt(0))) {
+                                hasURL = false;
+                                break;
+                            }
+                            for (int j = 0; j < ele.length(); j++) {
+                                if (ele.charAt(j) >= 128) {
+                                    hasURL = false;
+                                    break;
+                                }
+                            }
+                        }
+                        if (hasURL) {
+                            tokens = recursive(tokens, token, matcher.start(), matcher.end());
+                        } else {
+                            continue;
+                        }
+                    }
+
+                    else if (i == Regex.getRegexIndex("month")) {
+                        int start = matcher.start();
+
+                        boolean hasLetter = false;
+
+                        for (int j = 0; j < start; j++) {
+                            if (Character.isLetter(token.charAt(j))) {
+                                tokens = recursive(tokens, token, matcher.start(), matcher.end());
+                                hasLetter = true;
+                                break;
+                            }
+                        }
+
+                        if (!hasLetter) {
+                            tokens.add(token);
+                        }
+                    }
+
+                    else {
+                        tokens = recursive(tokens, token, matcher.start(), matcher.end());
+                    }
+
+                    matching = true;
+                    break;
+                }
+            }
+
+            if (matching)
+                continue;
+            else
+                tokens.add(token);
+        }
+
+        return tokens;
+    }
+
+    private static List<String> recursive(List<String> tokens, String token, int beginMatch, int endMatch)
+            throws IOException {
+        if (beginMatch > 0)
+            tokens.addAll(tokenize(token.substring(0, beginMatch)));
+        tokens.addAll(tokenize(token.substring(beginMatch, endMatch)));
+
+        if (endMatch < token.length())
+            tokens.addAll(tokenize(token.substring(endMatch)));
+
+        return tokens;
+    }
+
+    public static List<String> joinSentences(List<String> tokens) {
+        List<String> sentences = new ArrayList<>();
+
+        List<String> sentence = new ArrayList<>();
+        for (int i = 0; i < tokens.size(); i++) {
+            String token = tokens.get(i);
+            String nextToken = null;
+            if (i != tokens.size() - 1) {
+                nextToken = tokens.get(i + 1);
+            }
+            String beforeToken = null;
+            if (i > 0) {
+                beforeToken = tokens.get(i - 1);
+            }
+
+            sentence.add(token);
+
+            if (i == tokens.size() - 1) {
+                sentences.add(joinSentence(sentence));
+                return sentences;
+            }
+
+            if (i < tokens.size() - 2 && token.equals(StringConst.COLON)) {
+                if (Character.isDigit(nextToken.charAt(0)) && tokens.get(i + 2).equals(StringConst.STOP)
+                        || tokens.get(i + 2).equals(StringConst.COMMA)) {
+                    sentences.add(joinSentence(sentence));
+                    sentence.clear();
+                    continue;
+                }
+            }
+
+            if (token.matches(Regex.EOS_PUNCTUATION)) {
+
+                // Added by Dat Quoc Nguyen
+                if (nextToken.equals("\"") || nextToken.equals("''")) {
+                    int count = 0;
+                    for (String senToken : sentence) {
+                        if (senToken.equals("\"") || senToken.equals("''"))
+                            count += 1;
+                    }
+                    if (count % 2 == 1)
+                        continue;
+                }
+
+                // If the current sentence is in the quote or in the brace
+                if (StringUtils.isBrace(nextToken) || nextToken.isEmpty() || Character.isLowerCase(nextToken.charAt(0))
+                        || nextToken.equals(StringConst.COMMA) || Character.isDigit(nextToken.charAt(0))) {
+                    continue;
+                }
+
+                // Sentence starts with its order number
+                if (sentence.size() == 2 && token.equals(StringConst.STOP)) {
+                    if (Character.isDigit(beforeToken.charAt(0))) {
+                        continue;
+                    }
+                    if (Character.isLowerCase(beforeToken.charAt(0))) {
+                        continue;
+                    }
+                    if (Character.isUpperCase(beforeToken.charAt(0))) {
+                        if (beforeToken.length() == 1) {
+                            continue;
+                        }
+                    }
+                }
+
+                sentences.add(joinSentence(sentence));
+                sentence.clear();
+            }
+        }
+
+        return sentences;
+    }
+
+    public static String joinSentence(List<String> tokens) {
+        StringBuffer sent = new StringBuffer();
+        int length = tokens.size();
+        String token;
+        for (int i = 0; i < length; i++) {
+            token = tokens.get(i);
+            if (token.isEmpty() || token == null || token.equals(StringConst.SPACE)) {
+                continue;
+            }
+            sent.append(token);
+            if (i < length - 1)
+                sent.append(StringConst.SPACE);
+        }
+        return sent.toString().trim();
+    }
+}
+
+interface StringConst
+{
+    public static final String BOS = "<s>";
+    public static final String EOS = "</s>";
+
+    public static final String SPACE = " ";
+    public static final String COMMA = ",";
+    public static final String STOP = ".";
+    public static final String COLON = ":";
+    public static final String UNDERSCORE = "_";
+}
+
+class Regex
+{
+
+    public static final String ELLIPSIS = "\\.{2,}";
+
+    public static final String EMAIL = "([\\w\\d_\\.-]+)@(([\\d\\w-]+)\\.)*([\\d\\w-]+)";
+
+    public static final String FULL_DATE = "(0?[1-9]|[12][0-9]|3[01])(\\/|-|\\.)(1[0-2]|(0?[1-9]))((\\/|-|\\.)\\d{4})";
+
+    public static final String MONTH = "(1[0-2]|(0?[1-9]))(\\/)\\d{4}";
+
+    public static final String DATE = "(0?[1-9]|[12][0-9]|3[01])(\\/)(1[0-2]|(0?[1-9]))";
+
+    public static final String TIME = "(\\d\\d:\\d\\d:\\d\\d)|((0?\\d|1\\d|2[0-3])(:|h)(0?\\d|[1-5]\\d)(’|'|p|ph)?)";
+
+    public static final String MONEY = "\\p{Sc}\\d+([\\.,]\\d+)*|\\d+([\\.,]\\d+)*\\p{Sc}";
+
+    public static final String PHONE_NUMBER = "(\\(?\\+\\d{1,2}\\)?[\\s\\.-]?)?\\d{2,}[\\s\\.-]?\\d{3,}[\\s\\.-]?\\d{3,}";
+
+    public static final String URL = "(((https?|ftp):\\/\\/|www\\.)[^\\s/$.?#].[^\\s]*)|(https?:\\/\\/)?(www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%_\\+.~#?&//=]*)";
+
+    public static final String NUMBER = "[-+]?\\d+([\\.,]\\d+)*%?\\p{Sc}?";
+
+    public static final String PUNCTUATION = ",|\\.|:|\\?|!|;|-|_|\"|'|“|”|\\||\\(|\\)|\\[|\\]|\\{|\\}|âŸ¨|âŸ©|Â«|Â»|\\\\|\\/|\\â€˜|\\â€™|\\â€œ|\\â€�|â€¦|…|‘|’|·";
+
+    public static final String SPECIAL_CHAR = "\\~|\\@|\\#|\\^|\\&|\\*|\\+|\\-|\\â€“|<|>|\\|";
+
+    public static final String EOS_PUNCTUATION = "(\\.+|\\?|!|…)";
+
+    public static final String NUMBERS_EXPRESSION = NUMBER + "([\\+\\-\\*\\/]" + NUMBER + ")*";
+
+    public static final String SHORT_NAME = "([\\p{L}]+([\\.\\-][\\p{L}]+)+)|([\\p{L}]+-\\d+)";
+
+    public static final String WORD_WITH_HYPHEN = "\\p{L}+-\\p{L}+(-\\p{L}+)*";
+
+    public static final String ALLCAP = "[A-Z]+\\.[A-Z]+";
+
+    private static List<String> regexes = null;
+
+    private static List<String> regexIndex = null;
+
+    public static List<String> getRegexList()
+    {
+        if (regexes == null) {
+            regexes = new ArrayList<String>();
+            regexIndex = new ArrayList<String>();
+
+            regexes.add(ELLIPSIS);
+            regexIndex.add("ELLIPSIS");
+
+            regexes.add(EMAIL);
+            regexIndex.add("EMAIL");
+
+            regexes.add(URL);
+            regexIndex.add("URL");
+
+            regexes.add(FULL_DATE);
+            regexIndex.add("FULL_DATE");
+
+            regexes.add(MONTH);
+            regexIndex.add("MONTH");
+
+            regexes.add(DATE);
+            regexIndex.add("DATE");
+
+            regexes.add(TIME);
+            regexIndex.add("TIME");
+
+            regexes.add(MONEY);
+            regexIndex.add("MONEY");
+
+            regexes.add(PHONE_NUMBER);
+            regexIndex.add("PHONE_NUMBER");
+
+            regexes.add(SHORT_NAME);
+            regexIndex.add("SHORT_NAME");
+
+            regexes.add(NUMBERS_EXPRESSION);
+            regexIndex.add("NUMBERS_EXPRESSION");
+
+            regexes.add(NUMBER);
+            regexIndex.add("NUMBER");
+
+            regexes.add(WORD_WITH_HYPHEN);
+            regexIndex.add("WORD_WITH_HYPHEN");
+
+            regexes.add(PUNCTUATION);
+            regexIndex.add("PUNCTUATION");
+
+            regexes.add(SPECIAL_CHAR);
+            regexIndex.add("SPECIAL_CHAR");
+
+            regexes.add(ALLCAP);
+            regexIndex.add("ALLCAP");
+
+        }
+
+        return regexes;
+    }
+
+    public static int getRegexIndex(String regex)
+    {
+        return regexIndex.indexOf(regex.toUpperCase());
+    }
+    public static void main(String[] args) throws IOException {
+        List<String> tokens = Tokenizer.tokenize("93% 9-10 anh-yeu-em");
+
+        for(String token : tokens) {
+            System.out.print(token  + " ");
+        }
+    }
+}
\ No newline at end of file
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/FWObject.java b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/FWObject.java
new file mode 100644
index 0000000000000000000000000000000000000000..a34631f975989371a88553707207be64e3893d36
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/FWObject.java
@@ -0,0 +1,30 @@
+package vn.corenlp.wordsegmenter;
+
+/**
+ * @author DatQuocNguyen
+ */
+
+/*
+ * Define a 5-word/tag window object to capture the context surrounding a word
+ */
+public class FWObject {
+    private String[] context;
+
+    public FWObject(boolean check) {
+        context = new String[10];
+        if (check == true) {
+            for (int i = 0; i < 10; i += 2) {
+                context[i] = "<W>";
+                context[i + 1] = "<T>";
+            }
+        }
+    }
+
+    public String[] getContext() {
+        return context;
+    }
+
+    public void setContext(String[] context) {
+        this.context = context;
+    }
+}
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/Node.java b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/Node.java
new file mode 100644
index 0000000000000000000000000000000000000000..cb9be61cd00e37c7308b8cd4b048f11e66c08ff7
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/Node.java
@@ -0,0 +1,85 @@
+package vn.corenlp.wordsegmenter;
+
+/**
+ * @author DatQuocNguyen
+ */
+
+public class Node {
+    private FWObject condition;
+    private String conclusion;
+    private Node exceptNode;
+    private Node ifnotNode;
+    private Node fatherNode;
+    private int depth;
+
+    public Node(FWObject inCondition, String inConclusion, Node inFatherNode, Node inExceptNode,
+                Node inIfnotNode, int inDepth) {
+        this.condition = inCondition;
+        this.conclusion = inConclusion;
+        this.fatherNode = inFatherNode;
+        this.exceptNode = inExceptNode;
+        this.ifnotNode = inIfnotNode;
+        this.depth = inDepth;
+    }
+
+    public void setIfnotNode(Node node) {
+        this.ifnotNode = node;
+    }
+
+    public void setExceptNode(Node node) {
+        this.exceptNode = node;
+    }
+
+    public void setFatherNode(Node node) {
+        this.fatherNode = node;
+    }
+
+    public int countNodes() {
+        int count = 1;
+        if (exceptNode != null) {
+            count += exceptNode.countNodes();
+        }
+        if (ifnotNode != null) {
+            count += ifnotNode.countNodes();
+        }
+        return count;
+    }
+
+    public boolean satisfy(FWObject object) {
+        boolean check = true;
+        for (int i = 0; i < 10; i++) {
+            String key = condition.getContext()[i];
+            if (key != null) {
+                if (!key.equals(object.getContext()[i])) {
+                    check = false;
+                    break;
+                }
+            }
+        }
+        return check;
+    }
+
+    public FWObject getCondition() {
+        return condition;
+    }
+
+    public String getConclusion() {
+        return conclusion;
+    }
+
+    public Node getExceptNode() {
+        return exceptNode;
+    }
+
+    public Node getIfnotNode() {
+        return ifnotNode;
+    }
+
+    public Node getFatherNode() {
+        return fatherNode;
+    }
+
+    public int getDepth() {
+        return depth;
+    }
+}
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/Utils.java b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/Utils.java
new file mode 100644
index 0000000000000000000000000000000000000000..194261e86fd46bb7c8f45bb7c019b1a081b814fe
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/Utils.java
@@ -0,0 +1,126 @@
+package vn.corenlp.wordsegmenter;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * @author DatQuocNguyen
+ *
+ */
+public class Utils
+{
+    public static FWObject getCondition(String strCondition)
+    {
+        FWObject condition = new FWObject(false);
+
+        for (String rule : strCondition.split(" and ")) {
+            rule = rule.trim();
+            String key = rule.substring(rule.indexOf(".") + 1, rule.indexOf(" "));
+            String value = getConcreteValue(rule);
+
+            if (key.equals("prevWord2")) {
+                condition.getContext()[4] = value;
+            }
+            else if (key.equals("prevTag2")) {
+                condition.getContext()[5] = value;
+            }
+            else if (key.equals("prevWord1")) {
+                condition.getContext()[2] = value;
+            }
+            else if (key.equals("prevTag1")) {
+                condition.getContext()[3] = value;
+            }
+            else if (key.equals("word")) {
+                condition.getContext()[1] = value;
+            }
+            else if (key.equals("tag")) {
+                condition.getContext()[0] = value;
+            }
+            else if (key.equals("nextWord1")) {
+                condition.getContext()[6] = value;
+            }
+            else if (key.equals("nextTag1")) {
+                condition.getContext()[7] = value;
+            }
+            else if (key.equals("nextWord2")) {
+                condition.getContext()[8] = value;
+            }
+            else if (key.equals("nextTag2")) {
+                condition.getContext()[9] = value;
+            }
+        }
+
+        return condition;
+    }
+
+    public static FWObject getObject(List<WordTag> wordtags, int size, int index)
+    {
+        FWObject object = new FWObject(true);
+
+        if (index > 1) {
+            object.getContext()[4] = wordtags.get(index - 2).word;
+            object.getContext()[5] = wordtags.get(index - 2).tag;
+        }
+
+        if (index > 0) {
+            object.getContext()[2] = wordtags.get(index - 1).word;
+            object.getContext()[3] = wordtags.get(index - 1).tag;
+        }
+
+        String currentWord = wordtags.get(index).word;
+        String currentTag = wordtags.get(index).tag;
+
+        object.getContext()[1] = currentWord;
+        object.getContext()[0] = currentTag;
+
+        if (index < size - 1) {
+            object.getContext()[6] = wordtags.get(index + 1).word;
+            object.getContext()[7] = wordtags.get(index + 1).tag;
+        }
+
+        if (index < size - 2) {
+            object.getContext()[8] = wordtags.get(index + 2).word;
+            object.getContext()[9] = wordtags.get(index + 2).tag;
+        }
+
+        return object;
+    }
+
+    public static String getConcreteValue(String str)
+    {
+        if (str.contains("\"\"")) {
+            if (str.contains("Word"))
+                return "<W>";
+            else
+                return "<T>";
+        }
+        String conclusion = str.substring(str.indexOf("\"") + 1, str.length() - 1);
+        return conclusion;
+    }
+
+    public static Map<String, String> NORMALIZER;
+    public static Set<String> NORMALIZER_KEYS;
+    static {
+        NORMALIZER = new HashMap<String, String>();
+        NORMALIZER.put("òa", "oà");
+        NORMALIZER.put("óa", "oá");
+        NORMALIZER.put("ỏa", "oả");
+        NORMALIZER.put("õa", "oã");
+        NORMALIZER.put("ọa", "oạ");
+        NORMALIZER.put("òe", "oè");
+        NORMALIZER.put("óe", "oé");
+        NORMALIZER.put("ỏe", "oẻ");
+        NORMALIZER.put("õe", "oẽ");
+        NORMALIZER.put("ọe", "oẹ");
+        NORMALIZER.put("ùy", "uỳ");
+        NORMALIZER.put("úy", "uý");
+        NORMALIZER.put("ủy", "uỷ");
+        NORMALIZER.put("ũy", "uỹ");
+        NORMALIZER.put("ụy", "uỵ");
+        NORMALIZER.put("Ủy", "Uỷ");
+        NORMALIZER_KEYS = NORMALIZER.keySet();
+    }
+
+}
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java
new file mode 100644
index 0000000000000000000000000000000000000000..aca365c43424907f0a7a2a069d68752f0f0d7782
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java
@@ -0,0 +1,1605 @@
+package vn.corenlp.wordsegmenter;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.util.HashSet;
+import java.util.Set;
+
+import vn.pipeline.Utils;
+
+@SuppressWarnings("unchecked")
+public class Vocabulary {
+    public static Set<String> VN_DICT;
+    static {
+        VN_DICT = new HashSet<String>();
+        try {
+            String vocabPath = Utils.jarDir + "/models/wordsegmenter/vi-vocab";
+            if (!new File(vocabPath).exists())
+                throw new IOException("Vocabulary: " + vocabPath + " is not found!");
+            //Vocabulary.class.getClassLoader().getResource("wordsegmenter/vi-vocab").getPath()
+            ObjectInputStream ois = new ObjectInputStream(new FileInputStream(vocabPath));
+            VN_DICT = (Set<String>) ois.readObject();
+            ois.close();
+        }
+        catch (IOException | ClassNotFoundException e1) {
+            // TODO Auto-generated catch block
+            e1.printStackTrace();
+        }
+        // BufferedReader buffer;
+        // try {
+        // buffer = new BufferedReader(new InputStreamReader(
+        // new FileInputStream(new File("VnVocab.txt")), "UTF-8"));
+        // for (String line; (line = buffer.readLine()) != null;) {
+        // line = line.trim();
+        // if (line.contains(" "))
+        // VN_DICT.add(line);
+        // }
+        // buffer.close();
+        // }
+        // catch (FileNotFoundException e) {
+        // // TODO Auto-generated catch block
+        // e.printStackTrace();
+        // }
+        // catch (IOException e) {
+        // // TODO Auto-generated catch block
+        // e.printStackTrace();
+        // }
+    }
+
+    public static Set<String> COUNTRY_L_NAME;
+    static {
+        COUNTRY_L_NAME = new HashSet<String>();
+        COUNTRY_L_NAME.add("na uy");
+        COUNTRY_L_NAME.add("san marino");
+        COUNTRY_L_NAME.add("phần lan");
+        COUNTRY_L_NAME.add("bồ đào nha");
+        COUNTRY_L_NAME.add("ca-ri-bê hà lan");
+        COUNTRY_L_NAME.add("quần đảo bắc mariana");
+        COUNTRY_L_NAME.add("ả rập xê-út");
+        COUNTRY_L_NAME.add("tây ban nha");
+        COUNTRY_L_NAME.add("quần đảo virgin");
+        COUNTRY_L_NAME.add("đảo somoa thuộc mỹ");
+        COUNTRY_L_NAME.add("đông timor");
+        COUNTRY_L_NAME.add("hoa kỳ");
+        COUNTRY_L_NAME.add("quần đảo pitcairn");
+        COUNTRY_L_NAME.add("samoa thuộc mỹ");
+        COUNTRY_L_NAME.add("hàn quốc");
+        COUNTRY_L_NAME.add("đảo ascension");
+        COUNTRY_L_NAME.add("thuỵ sĩ");
+        COUNTRY_L_NAME.add("ai cập");
+        COUNTRY_L_NAME.add("burkina faso");
+        COUNTRY_L_NAME.add("mông cổ");
+        COUNTRY_L_NAME.add("polynesia thuộc pháp");
+        COUNTRY_L_NAME.add("turks và caicos");
+        COUNTRY_L_NAME.add("thổ nhĩ kỳ");
+        COUNTRY_L_NAME.add("liên bang micronesia");
+        COUNTRY_L_NAME.add("đảo man");
+        COUNTRY_L_NAME.add("saint helena");
+        COUNTRY_L_NAME.add("ả rập saudi");
+        COUNTRY_L_NAME.add("ba lan");
+        COUNTRY_L_NAME.add("são tomé và príncipe");
+        COUNTRY_L_NAME.add("đảo norfolk");
+        COUNTRY_L_NAME.add("chdcnd triều tiên");
+        COUNTRY_L_NAME.add("quần đảo canary");
+        COUNTRY_L_NAME.add("guiana thuộc pháp");
+        COUNTRY_L_NAME.add("antigua và barbuda");
+        COUNTRY_L_NAME.add("saint pierre và miquelon");
+        COUNTRY_L_NAME.add("sri lanka");
+        COUNTRY_L_NAME.add("ceuta và melilla");
+        COUNTRY_L_NAME.add("việt nam");
+        COUNTRY_L_NAME.add("bờ biển ngà");
+        COUNTRY_L_NAME.add("thuỵ điển");
+        COUNTRY_L_NAME.add("el salvador");
+        COUNTRY_L_NAME.add("svalbard và jan mayen");
+        COUNTRY_L_NAME.add("saint lucia");
+        COUNTRY_L_NAME.add("diego garcia");
+        COUNTRY_L_NAME.add("ấn độ");
+        COUNTRY_L_NAME.add("tây sahara");
+        COUNTRY_L_NAME.add("quần đảo cook");
+        COUNTRY_L_NAME.add("guinea xích đạo");
+        COUNTRY_L_NAME.add("trung quốc");
+        COUNTRY_L_NAME.add("chdc congo");
+        COUNTRY_L_NAME.add("cộng hoà dominica");
+        COUNTRY_L_NAME.add("cape verde");
+        COUNTRY_L_NAME.add("hà lan");
+        COUNTRY_L_NAME.add("puerto rico");
+        COUNTRY_L_NAME.add("đài loan");
+        COUNTRY_L_NAME.add("cộng hoà séc");
+        COUNTRY_L_NAME.add("costa rica");
+        COUNTRY_L_NAME.add("saint kitts và nevis");
+        COUNTRY_L_NAME.add("nhật bản");
+        COUNTRY_L_NAME.add("quần đảo faroe");
+        COUNTRY_L_NAME.add("đan mạch");
+        COUNTRY_L_NAME.add("turk và caicos");
+        COUNTRY_L_NAME.add("cabo verde");
+        COUNTRY_L_NAME.add("nam sudan");
+        COUNTRY_L_NAME.add("cộng hoà trung phi");
+        COUNTRY_L_NAME.add("trung phi");
+        COUNTRY_L_NAME.add("saint vincent và grenadines");
+        COUNTRY_L_NAME.add("quần đảo cocos");
+        COUNTRY_L_NAME.add("thành vatican");
+        COUNTRY_L_NAME.add("saint barthélemy");
+        COUNTRY_L_NAME.add("nam cực");
+        COUNTRY_L_NAME.add("trinidad và tobago");
+        COUNTRY_L_NAME.add("cộng hoà congo");
+        COUNTRY_L_NAME.add("quần đảo cayman");
+        COUNTRY_L_NAME.add("saint martin");
+        COUNTRY_L_NAME.add("tristan da cunha");
+        COUNTRY_L_NAME.add("bosnia và herzegovina");
+        COUNTRY_L_NAME.add("thái lan");
+        COUNTRY_L_NAME.add("new zealand");
+        COUNTRY_L_NAME.add("hồng kông");
+        COUNTRY_L_NAME.add("wallis và futuna");
+        COUNTRY_L_NAME.add("sierra leone");
+        COUNTRY_L_NAME.add("sint maarten");
+        COUNTRY_L_NAME.add("quần đảo solomon");
+        COUNTRY_L_NAME.add("nam phi");
+        COUNTRY_L_NAME.add("bosna và hercegovina");
+        COUNTRY_L_NAME.add("vương quốc anh");
+        COUNTRY_L_NAME.add("papua new guinea");
+        COUNTRY_L_NAME.add("hy lạp");
+        COUNTRY_L_NAME.add("đảo giáng sinh");
+        COUNTRY_L_NAME.add("triều tiên");
+        COUNTRY_L_NAME.add("quần đảo falkland");
+        COUNTRY_L_NAME.add("miến điện");
+        COUNTRY_L_NAME.add("quần đảo marshall");
+        COUNTRY_L_NAME.add("new caledonia");
+    }
+
+    public static Set<String> COUNTRY_S_NAME;
+    static {
+        COUNTRY_S_NAME = new HashSet<String>();
+        COUNTRY_S_NAME.add("mỹ");
+        COUNTRY_S_NAME.add("belarus");
+        COUNTRY_S_NAME.add("guinée");
+        COUNTRY_S_NAME.add("gambia");
+        COUNTRY_S_NAME.add("cô-oét");
+        COUNTRY_S_NAME.add("guinea");
+        COUNTRY_S_NAME.add("estonia");
+        COUNTRY_S_NAME.add("philippines");
+        COUNTRY_S_NAME.add("cuba");
+        COUNTRY_S_NAME.add("mauritius");
+        COUNTRY_S_NAME.add("mali");
+        COUNTRY_S_NAME.add("armenia");
+        COUNTRY_S_NAME.add("aruba");
+        COUNTRY_S_NAME.add("méxico");
+        COUNTRY_S_NAME.add("ukraina");
+        COUNTRY_S_NAME.add("bénin");
+        COUNTRY_S_NAME.add("congo");
+        COUNTRY_S_NAME.add("monaco");
+        COUNTRY_S_NAME.add("séc");
+        COUNTRY_S_NAME.add("kenya");
+        COUNTRY_S_NAME.add("hungary");
+        COUNTRY_S_NAME.add("greenland");
+        COUNTRY_S_NAME.add("li-băng");
+        COUNTRY_S_NAME.add("paraguay");
+        COUNTRY_S_NAME.add("palau");
+        COUNTRY_S_NAME.add("vanuatu");
+        COUNTRY_S_NAME.add("colombia");
+        COUNTRY_S_NAME.add("azerbaijan");
+        COUNTRY_S_NAME.add("syria");
+        COUNTRY_S_NAME.add("rwanda");
+        COUNTRY_S_NAME.add("libya");
+        COUNTRY_S_NAME.add("guernsey");
+        COUNTRY_S_NAME.add("afghanistan");
+        COUNTRY_S_NAME.add("guiné-bissau");
+        COUNTRY_S_NAME.add("hungari");
+        COUNTRY_S_NAME.add("kiribati");
+        COUNTRY_S_NAME.add("dominica");
+        COUNTRY_S_NAME.add("bulgaria");
+        COUNTRY_S_NAME.add("brasil");
+        COUNTRY_S_NAME.add("bahrain");
+        COUNTRY_S_NAME.add("guatemala");
+        COUNTRY_S_NAME.add("ghana");
+        COUNTRY_S_NAME.add("somalia");
+        COUNTRY_S_NAME.add("jamaica");
+        COUNTRY_S_NAME.add("togo");
+        COUNTRY_S_NAME.add("liechtenstein");
+        COUNTRY_S_NAME.add("serbia");
+        COUNTRY_S_NAME.add("ma-rốc");
+        COUNTRY_S_NAME.add("bỉ");
+        COUNTRY_S_NAME.add("úc");
+        COUNTRY_S_NAME.add("senegal");
+        COUNTRY_S_NAME.add("montserrat");
+        COUNTRY_S_NAME.add("zambia");
+        COUNTRY_S_NAME.add("namibia");
+        COUNTRY_S_NAME.add("comoros");
+        COUNTRY_S_NAME.add("curaçao");
+        COUNTRY_S_NAME.add("palestine");
+        COUNTRY_S_NAME.add("canada");
+        COUNTRY_S_NAME.add("li-bi");
+        COUNTRY_S_NAME.add("honduras");
+        COUNTRY_S_NAME.add("réunion");
+        COUNTRY_S_NAME.add("maldives");
+        COUNTRY_S_NAME.add("chile");
+        COUNTRY_S_NAME.add("algérie");
+        COUNTRY_S_NAME.add("oman");
+        COUNTRY_S_NAME.add("timor-leste");
+        COUNTRY_S_NAME.add("brazil");
+        COUNTRY_S_NAME.add("lesotho");
+        COUNTRY_S_NAME.add("guyana");
+        COUNTRY_S_NAME.add("peru");
+        COUNTRY_S_NAME.add("malaysia");
+        COUNTRY_S_NAME.add("jersey");
+        COUNTRY_S_NAME.add("síp");
+        COUNTRY_S_NAME.add("belize");
+        COUNTRY_S_NAME.add("nauru");
+        COUNTRY_S_NAME.add("campuchia");
+        COUNTRY_S_NAME.add("kuwait");
+        COUNTRY_S_NAME.add("slovenia");
+        COUNTRY_S_NAME.add("somali");
+        COUNTRY_S_NAME.add("haiti");
+        COUNTRY_S_NAME.add("zimbabwe");
+        COUNTRY_S_NAME.add("macedonia");
+        COUNTRY_S_NAME.add("micronesia");
+        COUNTRY_S_NAME.add("philippin");
+        COUNTRY_S_NAME.add("bolivia");
+        COUNTRY_S_NAME.add("brunei");
+        COUNTRY_S_NAME.add("israel");
+        COUNTRY_S_NAME.add("lào");
+        COUNTRY_S_NAME.add("bangladesh");
+        COUNTRY_S_NAME.add("ý");
+        COUNTRY_S_NAME.add("ireland");
+        COUNTRY_S_NAME.add("albania");
+        COUNTRY_S_NAME.add("botswana");
+        COUNTRY_S_NAME.add("venezuela");
+        COUNTRY_S_NAME.add("andorra");
+        COUNTRY_S_NAME.add("malawi");
+        COUNTRY_S_NAME.add("moldova");
+        COUNTRY_S_NAME.add("madagascar");
+        COUNTRY_S_NAME.add("turkmenistan");
+        COUNTRY_S_NAME.add("iran");
+        COUNTRY_S_NAME.add("iraq");
+        COUNTRY_S_NAME.add("seychelles");
+        COUNTRY_S_NAME.add("indonesia");
+        COUNTRY_S_NAME.add("tchad");
+        COUNTRY_S_NAME.add("nicaragua");
+        COUNTRY_S_NAME.add("gibraltar");
+        COUNTRY_S_NAME.add("ethiopia");
+        COUNTRY_S_NAME.add("ecuador");
+        COUNTRY_S_NAME.add("guinea-bissau");
+        COUNTRY_S_NAME.add("mauritania");
+        COUNTRY_S_NAME.add("albani");
+        COUNTRY_S_NAME.add("algeria");
+        COUNTRY_S_NAME.add("mozambique");
+        COUNTRY_S_NAME.add("cameroon");
+        COUNTRY_S_NAME.add("vatican");
+        COUNTRY_S_NAME.add("liban");
+        COUNTRY_S_NAME.add("panama");
+        COUNTRY_S_NAME.add("uae");
+        COUNTRY_S_NAME.add("luxembourg");
+        COUNTRY_S_NAME.add("nigeria");
+        COUNTRY_S_NAME.add("sudan");
+        COUNTRY_S_NAME.add("benin");
+        COUNTRY_S_NAME.add("chad");
+        COUNTRY_S_NAME.add("liberia");
+        COUNTRY_S_NAME.add("djibouti");
+        COUNTRY_S_NAME.add("đức");
+        COUNTRY_S_NAME.add("tajikistan");
+        COUNTRY_S_NAME.add("fiji");
+        COUNTRY_S_NAME.add("singapore");
+        COUNTRY_S_NAME.add("mexico");
+        COUNTRY_S_NAME.add("samoa");
+        COUNTRY_S_NAME.add("tunisia");
+        COUNTRY_S_NAME.add("bahamas");
+        COUNTRY_S_NAME.add("bhutan");
+        COUNTRY_S_NAME.add("uganda");
+        COUNTRY_S_NAME.add("uruguay");
+        COUNTRY_S_NAME.add("gabon");
+        COUNTRY_S_NAME.add("bungari");
+        COUNTRY_S_NAME.add("niger");
+        COUNTRY_S_NAME.add("kyrgyzstan");
+        COUNTRY_S_NAME.add("pakistan");
+        COUNTRY_S_NAME.add("martinique");
+        COUNTRY_S_NAME.add("macao");
+        COUNTRY_S_NAME.add("kosovo");
+        COUNTRY_S_NAME.add("mayotte");
+        COUNTRY_S_NAME.add("yemen");
+        COUNTRY_S_NAME.add("georgia");
+        COUNTRY_S_NAME.add("pháp");
+        COUNTRY_S_NAME.add("ai-len");
+        COUNTRY_S_NAME.add("argentina");
+        COUNTRY_S_NAME.add("jordan");
+        COUNTRY_S_NAME.add("anguilla");
+        COUNTRY_S_NAME.add("swaziland");
+        COUNTRY_S_NAME.add("burundi");
+        COUNTRY_S_NAME.add("slovakia");
+        COUNTRY_S_NAME.add("uzbekistan");
+        COUNTRY_S_NAME.add("maroc");
+        COUNTRY_S_NAME.add("tanzania");
+        COUNTRY_S_NAME.add("litva");
+        COUNTRY_S_NAME.add("grenada");
+        COUNTRY_S_NAME.add("gruzia");
+        COUNTRY_S_NAME.add("lít-va");
+        COUNTRY_S_NAME.add("guam");
+        COUNTRY_S_NAME.add("eritrea");
+        COUNTRY_S_NAME.add("áo");
+        COUNTRY_S_NAME.add("croatia");
+        COUNTRY_S_NAME.add("niue");
+        COUNTRY_S_NAME.add("nepal");
+        COUNTRY_S_NAME.add("tokelau");
+        COUNTRY_S_NAME.add("bermuda");
+        COUNTRY_S_NAME.add("i-rắc");
+        COUNTRY_S_NAME.add("suriname");
+        COUNTRY_S_NAME.add("guadeloupe");
+        COUNTRY_S_NAME.add("nga");
+        COUNTRY_S_NAME.add("romania");
+        COUNTRY_S_NAME.add("angola");
+        COUNTRY_S_NAME.add("latvia");
+        COUNTRY_S_NAME.add("kazakhstan");
+        COUNTRY_S_NAME.add("malta");
+        COUNTRY_S_NAME.add("myanmar");
+        COUNTRY_S_NAME.add("iceland");
+        COUNTRY_S_NAME.add("românia");
+        COUNTRY_S_NAME.add("montenegro");
+        COUNTRY_S_NAME.add("macau");
+        COUNTRY_S_NAME.add("tuvalu");
+        COUNTRY_S_NAME.add("qatar");
+        COUNTRY_S_NAME.add("tonga");
+        COUNTRY_S_NAME.add("barbados");
+    }
+
+    public static Set<String> WORLD_COMPANY;
+    static {
+        WORLD_COMPANY = new HashSet<String>();
+        WORLD_COMPANY.add("verizon");
+        WORLD_COMPANY.add("prada");
+        WORLD_COMPANY.add("hp");
+        WORLD_COMPANY.add("walmart");
+        WORLD_COMPANY.add("adidas");
+        WORLD_COMPANY.add("mastercard");
+        WORLD_COMPANY.add("digg");
+        WORLD_COMPANY.add("canon");
+        WORLD_COMPANY.add("ikea");
+        WORLD_COMPANY.add("sony");
+        WORLD_COMPANY.add("twitter");
+        WORLD_COMPANY.add("lego");
+        WORLD_COMPANY.add("toshiba");
+        WORLD_COMPANY.add("nokia");
+        WORLD_COMPANY.add("bbc");
+        WORLD_COMPANY.add("vmware");
+        WORLD_COMPANY.add("mercedes-benz");
+        WORLD_COMPANY.add("google");
+        WORLD_COMPANY.add("intel");
+        WORLD_COMPANY.add("iphone");
+        WORLD_COMPANY.add("rbc");
+        WORLD_COMPANY.add("fedex");
+        WORLD_COMPANY.add("mercedes");
+        WORLD_COMPANY.add("gillette");
+        WORLD_COMPANY.add("ups");
+        WORLD_COMPANY.add("carrefour");
+        WORLD_COMPANY.add("lenovo");
+        WORLD_COMPANY.add("loreal");
+        WORLD_COMPANY.add("mcdonald");
+        WORLD_COMPANY.add("coca-cola");
+        WORLD_COMPANY.add("guardian");
+        WORLD_COMPANY.add("cisco");
+        WORLD_COMPANY.add("paypal");
+        WORLD_COMPANY.add("cvs");
+        WORLD_COMPANY.add("acer");
+        WORLD_COMPANY.add("cnn");
+        WORLD_COMPANY.add("nike");
+        WORLD_COMPANY.add("facebook");
+        WORLD_COMPANY.add("spotify");
+        WORLD_COMPANY.add("adobe");
+        WORLD_COMPANY.add("kfc");
+        WORLD_COMPANY.add("westpac");
+        WORLD_COMPANY.add("subway");
+        WORLD_COMPANY.add("ibm");
+        WORLD_COMPANY.add("panasonic");
+        WORLD_COMPANY.add("visa");
+        WORLD_COMPANY.add("motorola");
+        WORLD_COMPANY.add("nissan");
+        WORLD_COMPANY.add("citibank");
+        WORLD_COMPANY.add("baidu");
+        WORLD_COMPANY.add("ford");
+        WORLD_COMPANY.add("microsoft");
+        WORLD_COMPANY.add("bmw");
+        WORLD_COMPANY.add("foxconn");
+        WORLD_COMPANY.add("yahoo");
+        WORLD_COMPANY.add("hermes");
+        WORLD_COMPANY.add("oracle");
+        WORLD_COMPANY.add("mcdonalds");
+        WORLD_COMPANY.add("tencent");
+        WORLD_COMPANY.add("mtv");
+        WORLD_COMPANY.add("zara");
+        WORLD_COMPANY.add("amazon");
+        WORLD_COMPANY.add("toyota");
+        WORLD_COMPANY.add("gucci");
+        WORLD_COMPANY.add("ebay");
+        WORLD_COMPANY.add("kodak");
+        WORLD_COMPANY.add("youtube");
+        WORLD_COMPANY.add("android");
+        WORLD_COMPANY.add("linkedin");
+        WORLD_COMPANY.add("myspace");
+        WORLD_COMPANY.add("t-mobile");
+        WORLD_COMPANY.add("apple");
+        WORLD_COMPANY.add("samsung");
+        WORLD_COMPANY.add("aldi");
+        WORLD_COMPANY.add("colgate");
+        WORLD_COMPANY.add("starbucks");
+        WORLD_COMPANY.add("pepsi");
+        WORLD_COMPANY.add("honda");
+        WORLD_COMPANY.add("dell");
+        WORLD_COMPANY.add("hitachi");
+        WORLD_COMPANY.add("blackberry");
+        WORLD_COMPANY.add("disney");
+        WORLD_COMPANY.add("siemens");
+        WORLD_COMPANY.add("vodafone");
+    }
+
+    public static Set<String> VN_LOCATIONS;
+    static {
+        VN_LOCATIONS = new HashSet<String>();
+        VN_LOCATIONS.add("mỹ tho");
+        VN_LOCATIONS.add("tập cận bình");
+        VN_LOCATIONS.add("nam đông");
+        VN_LOCATIONS.add("kiên lương");
+        VN_LOCATIONS.add("lương sơn");
+        VN_LOCATIONS.add("gò vấp");
+        VN_LOCATIONS.add("quang bình");
+        VN_LOCATIONS.add("ia pa");
+        VN_LOCATIONS.add("lạc sơn");
+        VN_LOCATIONS.add("chí linh");
+        VN_LOCATIONS.add("ninh hải");
+        VN_LOCATIONS.add("sơn dương");
+        VN_LOCATIONS.add("quan sơn");
+        VN_LOCATIONS.add("ứng hoà");
+        VN_LOCATIONS.add("krông pắk");
+        VN_LOCATIONS.add("tân hưng");
+        VN_LOCATIONS.add("nghệ an");
+        VN_LOCATIONS.add("tân thạnh");
+        VN_LOCATIONS.add("yên định");
+        VN_LOCATIONS.add("mường nhé");
+        VN_LOCATIONS.add("ngô quyền");
+        VN_LOCATIONS.add("hàm thuận bắc");
+        VN_LOCATIONS.add("phú tân");
+        VN_LOCATIONS.add("tân hồng");
+        VN_LOCATIONS.add("trà ôn");
+        VN_LOCATIONS.add("từ liêm");
+        VN_LOCATIONS.add("bình thuận");
+        VN_LOCATIONS.add("an phú");
+        VN_LOCATIONS.add("duy xuyên");
+        VN_LOCATIONS.add("nam trực");
+        VN_LOCATIONS.add("phù cừ");
+        VN_LOCATIONS.add("mai sơn");
+        VN_LOCATIONS.add("thạnh phú");
+        VN_LOCATIONS.add("lộc bình");
+        VN_LOCATIONS.add("kim thành");
+        VN_LOCATIONS.add("cái bè");
+        VN_LOCATIONS.add("hà quảng");
+        VN_LOCATIONS.add("long thành");
+        VN_LOCATIONS.add("đồng phù");
+        VN_LOCATIONS.add("bảo yên");
+        VN_LOCATIONS.add("chiêm hoá");
+        VN_LOCATIONS.add("gia nghĩa");
+        VN_LOCATIONS.add("an dương");
+        VN_LOCATIONS.add("phú quý");
+        VN_LOCATIONS.add("quảng trạch");
+        VN_LOCATIONS.add("trường sa");
+        VN_LOCATIONS.add("hoàn kiếm");
+        VN_LOCATIONS.add("thủ thừa");
+        VN_LOCATIONS.add("hải lăng");
+        VN_LOCATIONS.add("pleiku");
+        VN_LOCATIONS.add("thanh hoá");
+        VN_LOCATIONS.add("bạch thông");
+        VN_LOCATIONS.add("vĩnh phúc");
+        VN_LOCATIONS.add("vãn lãng");
+        VN_LOCATIONS.add("bình gia");
+        VN_LOCATIONS.add("sa thầy");
+        VN_LOCATIONS.add("triệu sơn");
+        VN_LOCATIONS.add("yên thuỷ");
+        VN_LOCATIONS.add("văn giang");
+        VN_LOCATIONS.add("hồ chí minh");
+        VN_LOCATIONS.add("nga sơn");
+        VN_LOCATIONS.add("gia lâm");
+        VN_LOCATIONS.add("vị thanh");
+        VN_LOCATIONS.add("cái răng");
+        VN_LOCATIONS.add("cao bằng");
+        VN_LOCATIONS.add("hoài ân");
+        VN_LOCATIONS.add("vĩnh long");
+        VN_LOCATIONS.add("kim động");
+        VN_LOCATIONS.add("ngân sơn");
+        VN_LOCATIONS.add("lấp vò");
+        VN_LOCATIONS.add("sông công");
+        VN_LOCATIONS.add("hoài nhơn");
+        VN_LOCATIONS.add("kim bôi");
+        VN_LOCATIONS.add("bắc ninh");
+        VN_LOCATIONS.add("thái nguyên");
+        VN_LOCATIONS.add("đơn dương");
+        VN_LOCATIONS.add("định quán");
+        VN_LOCATIONS.add("gò công");
+        VN_LOCATIONS.add("hà giang");
+        VN_LOCATIONS.add("hoà bình");
+        VN_LOCATIONS.add("mèo vạc");
+        VN_LOCATIONS.add("mộc châu");
+        VN_LOCATIONS.add("quảng ngãi");
+        VN_LOCATIONS.add("cẩm giàng");
+        VN_LOCATIONS.add("sông hinh");
+        VN_LOCATIONS.add("thới bình");
+        VN_LOCATIONS.add("phụng hiệp");
+        VN_LOCATIONS.add("ninh hoà");
+        VN_LOCATIONS.add("hậu giang");
+        VN_LOCATIONS.add("cái nước");
+        VN_LOCATIONS.add("ô môn");
+        VN_LOCATIONS.add("gia lai");
+        VN_LOCATIONS.add("phổ yên");
+        VN_LOCATIONS.add("quế sơn");
+        VN_LOCATIONS.add("yên thành");
+        VN_LOCATIONS.add("tiên du");
+        VN_LOCATIONS.add("an minh");
+        VN_LOCATIONS.add("chợ lách");
+        VN_LOCATIONS.add("phú ninh");
+        VN_LOCATIONS.add("tủa chùa");
+        VN_LOCATIONS.add("hương trà");
+        VN_LOCATIONS.add("thăng bình");
+        VN_LOCATIONS.add("vĩnh thuận");
+        VN_LOCATIONS.add("hà tĩnh");
+        VN_LOCATIONS.add("lâm đồng");
+        VN_LOCATIONS.add("phú quốc");
+        VN_LOCATIONS.add("long mỹ");
+        VN_LOCATIONS.add("long an");
+        VN_LOCATIONS.add("bình lục");
+        VN_LOCATIONS.add("vĩnh thạnh");
+        VN_LOCATIONS.add("đống đa");
+        VN_LOCATIONS.add("hạ long");
+        VN_LOCATIONS.add("kỳ sơn");
+        VN_LOCATIONS.add("đăk song");
+        VN_LOCATIONS.add("lai vung");
+        VN_LOCATIONS.add("ý yên");
+        VN_LOCATIONS.add("xuyên mộc");
+        VN_LOCATIONS.add("vị xuyên");
+        VN_LOCATIONS.add("duy tiên");
+        VN_LOCATIONS.add("khánh sơn");
+        VN_LOCATIONS.add("bỉm sơn");
+        VN_LOCATIONS.add("hiệp đức");
+        VN_LOCATIONS.add("kim sơn");
+        VN_LOCATIONS.add("xín mần");
+        VN_LOCATIONS.add("hương thuỷ");
+        VN_LOCATIONS.add("tuy hoà");
+        VN_LOCATIONS.add("u minh");
+        VN_LOCATIONS.add("thiệu hoá");
+        VN_LOCATIONS.add("bù đốp");
+        VN_LOCATIONS.add("yên sơn");
+        VN_LOCATIONS.add("quảng xương");
+        VN_LOCATIONS.add("cần đước");
+        VN_LOCATIONS.add("thuỷ nguyên");
+        VN_LOCATIONS.add("yên dũng");
+        VN_LOCATIONS.add("yên hưng");
+        VN_LOCATIONS.add("bắc mê");
+        VN_LOCATIONS.add("thọ xuân");
+        VN_LOCATIONS.add("móng cái");
+        VN_LOCATIONS.add("lạc dương");
+        VN_LOCATIONS.add("cẩm xuyên");
+        VN_LOCATIONS.add("lâm thao");
+        VN_LOCATIONS.add("bình tân");
+        VN_LOCATIONS.add("phúc yên");
+        VN_LOCATIONS.add("sơn tây");
+        VN_LOCATIONS.add("vĩnh châu");
+        VN_LOCATIONS.add("na hang");
+        VN_LOCATIONS.add("chương mỹ");
+        VN_LOCATIONS.add("bảo lộc");
+        VN_LOCATIONS.add("nghi xuân");
+        VN_LOCATIONS.add("lương tài");
+        VN_LOCATIONS.add("thoại sơn");
+        VN_LOCATIONS.add("cửa lò");
+        VN_LOCATIONS.add("đông hưng");
+        VN_LOCATIONS.add("lập thạch");
+        VN_LOCATIONS.add("nam định");
+        VN_LOCATIONS.add("quảng nam");
+        VN_LOCATIONS.add("kiên hải");
+        VN_LOCATIONS.add("đồng xuân");
+        VN_LOCATIONS.add("phú xuyên");
+        VN_LOCATIONS.add("tiểu cần");
+        VN_LOCATIONS.add("phúc thọ");
+        VN_LOCATIONS.add("đông giang");
+        VN_LOCATIONS.add("gò dầu");
+        VN_LOCATIONS.add("giá rai");
+        VN_LOCATIONS.add("tây sơn");
+        VN_LOCATIONS.add("phú hoà");
+        VN_LOCATIONS.add("việt yên");
+        VN_LOCATIONS.add("đak đoa");
+        VN_LOCATIONS.add("mường la");
+        VN_LOCATIONS.add("hồng ngự");
+        VN_LOCATIONS.add("bắc bình");
+        VN_LOCATIONS.add("phủ lý");
+        VN_LOCATIONS.add("gio linh");
+        VN_LOCATIONS.add("cồn cỏ");
+        VN_LOCATIONS.add("đức linh");
+        VN_LOCATIONS.add("củ chi");
+        VN_LOCATIONS.add("hương sơn");
+        VN_LOCATIONS.add("tịnh biên");
+        VN_LOCATIONS.add("bình thuỷ");
+        VN_LOCATIONS.add("nhà bè");
+        VN_LOCATIONS.add("yên thế");
+        VN_LOCATIONS.add("vĩnh tường");
+        VN_LOCATIONS.add("kế sách");
+        VN_LOCATIONS.add("sóc sơn");
+        VN_LOCATIONS.add("chợ đồn");
+        VN_LOCATIONS.add("châu phú");
+        VN_LOCATIONS.add("kiến an");
+        VN_LOCATIONS.add("sốp cộp");
+        VN_LOCATIONS.add("lệ thuỷ");
+        VN_LOCATIONS.add("sơn tịnh");
+        VN_LOCATIONS.add("càng long");
+        VN_LOCATIONS.add("vị thuỷ");
+        VN_LOCATIONS.add("ea súp");
+        VN_LOCATIONS.add("quảng điền");
+        VN_LOCATIONS.add("nghĩa lộ");
+        VN_LOCATIONS.add("đồ sơn");
+        VN_LOCATIONS.add("krông pa");
+        VN_LOCATIONS.add("việt trì");
+        VN_LOCATIONS.add("tân thành");
+        VN_LOCATIONS.add("nghĩa hưng");
+        VN_LOCATIONS.add("bạc liêu");
+        VN_LOCATIONS.add("hưng yên");
+        VN_LOCATIONS.add("hoàng mai");
+        VN_LOCATIONS.add("diên khánh");
+        VN_LOCATIONS.add("lăk");
+        VN_LOCATIONS.add("bắc trà my");
+        VN_LOCATIONS.add("tân châu");
+        VN_LOCATIONS.add("tân phú");
+        VN_LOCATIONS.add("bình long");
+        VN_LOCATIONS.add("đông hà");
+        VN_LOCATIONS.add("kon plông");
+        VN_LOCATIONS.add("sa đéc");
+        VN_LOCATIONS.add("an lão");
+        VN_LOCATIONS.add("như xuân");
+        VN_LOCATIONS.add("bến lức");
+        VN_LOCATIONS.add("thanh khê");
+        VN_LOCATIONS.add("long xuyên");
+        VN_LOCATIONS.add("chợ gạo");
+        VN_LOCATIONS.add("lục nam");
+        VN_LOCATIONS.add("hoà thành");
+        VN_LOCATIONS.add("vũng liêm");
+        VN_LOCATIONS.add("bình định");
+        VN_LOCATIONS.add("cẩm mỹ");
+        VN_LOCATIONS.add("mộc hoá");
+        VN_LOCATIONS.add("tánh linh");
+        VN_LOCATIONS.add("đất đỏ");
+        VN_LOCATIONS.add("quế võ");
+        VN_LOCATIONS.add("trấn yên");
+        VN_LOCATIONS.add("cầu ngang");
+        VN_LOCATIONS.add("lai châu");
+        VN_LOCATIONS.add("gò công tây");
+        VN_LOCATIONS.add("lý nhân");
+        VN_LOCATIONS.add("bà rịa-vũng tàu");
+        VN_LOCATIONS.add("bình giang");
+        VN_LOCATIONS.add("mường khương");
+        VN_LOCATIONS.add("gò quao");
+        VN_LOCATIONS.add("bình đại");
+        VN_LOCATIONS.add("điện bàn");
+        VN_LOCATIONS.add("hải châu");
+        VN_LOCATIONS.add("bắc giang");
+        VN_LOCATIONS.add("văn lâm");
+        VN_LOCATIONS.add("ninh thuận");
+        VN_LOCATIONS.add("cô tô");
+        VN_LOCATIONS.add("quảng uyên");
+        VN_LOCATIONS.add("đông hải");
+        VN_LOCATIONS.add("phan thiết");
+        VN_LOCATIONS.add("tĩnh gia");
+        VN_LOCATIONS.add("bạch long vĩ");
+        VN_LOCATIONS.add("hoài đức");
+        VN_LOCATIONS.add("la gi");
+        VN_LOCATIONS.add("ngọc hồi");
+        VN_LOCATIONS.add("bình sơn");
+        VN_LOCATIONS.add("dương minh châu");
+        VN_LOCATIONS.add("can lộc");
+        VN_LOCATIONS.add("hồng bàng");
+        VN_LOCATIONS.add("thanh miện");
+        VN_LOCATIONS.add("trảng bàng");
+        VN_LOCATIONS.add("thái bình");
+        VN_LOCATIONS.add("hải dương");
+        VN_LOCATIONS.add("hà tây");
+        VN_LOCATIONS.add("krông nô");
+        VN_LOCATIONS.add("tam đường");
+        VN_LOCATIONS.add("nguyên bình");
+        VN_LOCATIONS.add("thủ dầu một");
+        VN_LOCATIONS.add("vĩnh lộc");
+        VN_LOCATIONS.add("đăk r'lấp");
+        VN_LOCATIONS.add("hai bà trưng");
+        VN_LOCATIONS.add("long khánh");
+        VN_LOCATIONS.add("bình liêu");
+        VN_LOCATIONS.add("đồng hỷ");
+        VN_LOCATIONS.add("võ nhai");
+        VN_LOCATIONS.add("lạc thuỷ");
+        VN_LOCATIONS.add("quỳnh phụ");
+        VN_LOCATIONS.add("diễn châu");
+        VN_LOCATIONS.add("cầu giấy");
+        VN_LOCATIONS.add("sơn la");
+        VN_LOCATIONS.add("sông mã");
+        VN_LOCATIONS.add("kinh môn");
+        VN_LOCATIONS.add("thạch thành");
+        VN_LOCATIONS.add("ea kar");
+        VN_LOCATIONS.add("krông búk");
+        VN_LOCATIONS.add("gò công đông");
+        VN_LOCATIONS.add("phù ninh");
+        VN_LOCATIONS.add("sơn hà");
+        VN_LOCATIONS.add("đạ tẻh");
+        VN_LOCATIONS.add("mộ đức");
+        VN_LOCATIONS.add("cờ đỏ");
+        VN_LOCATIONS.add("hương khê");
+        VN_LOCATIONS.add("phú lương");
+        VN_LOCATIONS.add("di linh");
+        VN_LOCATIONS.add("phú vang");
+        VN_LOCATIONS.add("lạng giang");
+        VN_LOCATIONS.add("yên mô");
+        VN_LOCATIONS.add("giao thuỷ");
+        VN_LOCATIONS.add("quốc oai");
+        VN_LOCATIONS.add("tuyên quang");
+        VN_LOCATIONS.add("bát xát");
+        VN_LOCATIONS.add("bắc hà");
+        VN_LOCATIONS.add("đắk lắk");
+        VN_LOCATIONS.add("tiên phước");
+        VN_LOCATIONS.add("lê chân");
+        VN_LOCATIONS.add("tiên yên");
+        VN_LOCATIONS.add("bến cát");
+        VN_LOCATIONS.add("tây giang");
+        VN_LOCATIONS.add("đà nẵng");
+        VN_LOCATIONS.add("ia grai");
+        VN_LOCATIONS.add("tam bình");
+        VN_LOCATIONS.add("thường tín");
+        VN_LOCATIONS.add("vĩnh bảo");
+        VN_LOCATIONS.add("hướng hoá");
+        VN_LOCATIONS.add("sơn trà");
+        VN_LOCATIONS.add("tân uyên");
+        VN_LOCATIONS.add("m'đrăk");
+        VN_LOCATIONS.add("quản bạ");
+        VN_LOCATIONS.add("liên chiểu");
+        VN_LOCATIONS.add("tri tôn");
+        VN_LOCATIONS.add("tiên lãng");
+        VN_LOCATIONS.add("biên hoà");
+        VN_LOCATIONS.add("hải hậu");
+        VN_LOCATIONS.add("tây ninh");
+        VN_LOCATIONS.add("quỳnh nhai");
+        VN_LOCATIONS.add("thạch hà");
+        VN_LOCATIONS.add("đồng nai");
+        VN_LOCATIONS.add("tuyên hoá");
+        VN_LOCATIONS.add("mai châu");
+        VN_LOCATIONS.add("yên bái");
+        VN_LOCATIONS.add("duyên hải");
+        VN_LOCATIONS.add("tháp mười");
+        VN_LOCATIONS.add("phú nhuận");
+        VN_LOCATIONS.add("ân thi");
+        VN_LOCATIONS.add("khoái châu");
+        VN_LOCATIONS.add("hòn đất");
+        VN_LOCATIONS.add("thống nhất");
+        VN_LOCATIONS.add("nghĩa đàn");
+        VN_LOCATIONS.add("quế phong");
+        VN_LOCATIONS.add("thủ đức");
+        VN_LOCATIONS.add("hạ lang");
+        VN_LOCATIONS.add("vĩnh linh");
+        VN_LOCATIONS.add("yên lạc");
+        VN_LOCATIONS.add("triệu phong");
+        VN_LOCATIONS.add("lâm hà");
+        VN_LOCATIONS.add("bảo lâm");
+        VN_LOCATIONS.add("hải phòng");
+        VN_LOCATIONS.add("vũ quang");
+        VN_LOCATIONS.add("cao lộc");
+        VN_LOCATIONS.add("nhơn trạch");
+        VN_LOCATIONS.add("quảng trị");
+        VN_LOCATIONS.add("thạch thất");
+        VN_LOCATIONS.add("chơn thành");
+        VN_LOCATIONS.add("tân yên");
+        VN_LOCATIONS.add("thanh hà");
+        VN_LOCATIONS.add("thạnh hoá");
+        VN_LOCATIONS.add("si ma cai");
+        VN_LOCATIONS.add("bác ái");
+        VN_LOCATIONS.add("đăk hà");
+        VN_LOCATIONS.add("yên minh");
+        VN_LOCATIONS.add("tân bình");
+        VN_LOCATIONS.add("đại từ");
+        VN_LOCATIONS.add("phục hoà");
+        VN_LOCATIONS.add("ninh sơn");
+        VN_LOCATIONS.add("long phú");
+        VN_LOCATIONS.add("hà tiên");
+        VN_LOCATIONS.add("thanh bình");
+        VN_LOCATIONS.add("mỏ cày");
+        VN_LOCATIONS.add("thạnh trị");
+        VN_LOCATIONS.add("trà vinh");
+        VN_LOCATIONS.add("dầu tiếng");
+        VN_LOCATIONS.add("bắc kạn");
+        VN_LOCATIONS.add("chư sê");
+        VN_LOCATIONS.add("thanh trì");
+        VN_LOCATIONS.add("ngọc lạc");
+        VN_LOCATIONS.add("từ sơn");
+        VN_LOCATIONS.add("gia bình");
+        VN_LOCATIONS.add("pác nặm");
+        VN_LOCATIONS.add("thốt nốt");
+        VN_LOCATIONS.add("trà bồng");
+        VN_LOCATIONS.add("thừa thiên-huế");
+        VN_LOCATIONS.add("phước long");
+        VN_LOCATIONS.add("cẩm phả");
+        VN_LOCATIONS.add("kon rẫy");
+        VN_LOCATIONS.add("long biên");
+        VN_LOCATIONS.add("cư m'gar");
+        VN_LOCATIONS.add("cao lãnh");
+        VN_LOCATIONS.add("buôn đôn");
+        VN_LOCATIONS.add("đắk nông");
+        VN_LOCATIONS.add("lý sơn");
+        VN_LOCATIONS.add("sóc trăng");
+        VN_LOCATIONS.add("hoằng hoá");
+        VN_LOCATIONS.add("quận 10");
+        VN_LOCATIONS.add("krông ana");
+        VN_LOCATIONS.add("quận 11");
+        VN_LOCATIONS.add("quận 12");
+        VN_LOCATIONS.add("phan rang-tháp chàm");
+        VN_LOCATIONS.add("tân kỳ");
+        VN_LOCATIONS.add("tương dương");
+        VN_LOCATIONS.add("đan phượng");
+        VN_LOCATIONS.add("anh sơn");
+        VN_LOCATIONS.add("quận 2");
+        VN_LOCATIONS.add("quận 1");
+        VN_LOCATIONS.add("qui nhơn");
+        VN_LOCATIONS.add("tư nghĩa");
+        VN_LOCATIONS.add("bố trạch");
+        VN_LOCATIONS.add("quận 9");
+        VN_LOCATIONS.add("thạch an");
+        VN_LOCATIONS.add("bảo thắng");
+        VN_LOCATIONS.add("quận 8");
+        VN_LOCATIONS.add("quận 7");
+        VN_LOCATIONS.add("nghĩa hành");
+        VN_LOCATIONS.add("quận 6");
+        VN_LOCATIONS.add("quận 5");
+        VN_LOCATIONS.add("hội an");
+        VN_LOCATIONS.add("quận 4");
+        VN_LOCATIONS.add("quận 3");
+        VN_LOCATIONS.add("phong điền");
+        VN_LOCATIONS.add("xuân lộc");
+        VN_LOCATIONS.add("côn đảo");
+        VN_LOCATIONS.add("nha trang");
+        VN_LOCATIONS.add("tân lạc");
+        VN_LOCATIONS.add("hạ hoà");
+        VN_LOCATIONS.add("gia viễn");
+        VN_LOCATIONS.add("đồng tháp");
+        VN_LOCATIONS.add("hoành bồ");
+        VN_LOCATIONS.add("bắc quang");
+        VN_LOCATIONS.add("na rì");
+        VN_LOCATIONS.add("sông cầu");
+        VN_LOCATIONS.add("mường tè");
+        VN_LOCATIONS.add("yên phong");
+        VN_LOCATIONS.add("tứ kỳ");
+        VN_LOCATIONS.add("vũ thư");
+        VN_LOCATIONS.add("mỹ hào");
+        VN_LOCATIONS.add("chư prông");
+        VN_LOCATIONS.add("hóc môn");
+        VN_LOCATIONS.add("châu đốc");
+        VN_LOCATIONS.add("đô lương");
+        VN_LOCATIONS.add("mang thít");
+        VN_LOCATIONS.add("tràng định");
+        VN_LOCATIONS.add("cam ranh");
+        VN_LOCATIONS.add("mang yang");
+        VN_LOCATIONS.add("hàm thuận nam");
+        VN_LOCATIONS.add("hưng nguyên");
+        VN_LOCATIONS.add("kiến xương");
+        VN_LOCATIONS.add("ninh phước");
+        VN_LOCATIONS.add("phong thổ");
+        VN_LOCATIONS.add("đức thọ");
+        VN_LOCATIONS.add("hồng lĩnh");
+        VN_LOCATIONS.add("khánh vĩnh");
+        VN_LOCATIONS.add("mỹ lộc");
+        VN_LOCATIONS.add("ngọc hiển");
+        VN_LOCATIONS.add("phước sơn");
+        VN_LOCATIONS.add("hà đông");
+        VN_LOCATIONS.add("lào cai");
+        VN_LOCATIONS.add("vĩnh yên");
+        VN_LOCATIONS.add("quỳ châu");
+        VN_LOCATIONS.add("sơn động");
+        VN_LOCATIONS.add("bến cầu");
+        VN_LOCATIONS.add("đông anh");
+        VN_LOCATIONS.add("kông chro");
+        VN_LOCATIONS.add("trảng bom");
+        VN_LOCATIONS.add("đông triều");
+        VN_LOCATIONS.add("ba tơ");
+        VN_LOCATIONS.add("cù lao dung");
+        VN_LOCATIONS.add("mỹ xuyên");
+        VN_LOCATIONS.add("quảng hà");
+        VN_LOCATIONS.add("tân biên");
+        VN_LOCATIONS.add("bá thước");
+        VN_LOCATIONS.add("cà mau");
+        VN_LOCATIONS.add("chi lăng");
+        VN_LOCATIONS.add("yên bình");
+        VN_LOCATIONS.add("bình minh");
+        VN_LOCATIONS.add("bình dương");
+        VN_LOCATIONS.add("an nhơn");
+        VN_LOCATIONS.add("chư păh");
+        VN_LOCATIONS.add("việt nam");
+        VN_LOCATIONS.add("giồng riềng");
+        VN_LOCATIONS.add("cát tiên");
+        VN_LOCATIONS.add("thuận an");
+        VN_LOCATIONS.add("ngã năm");
+        VN_LOCATIONS.add("cẩm thuỷ");
+        VN_LOCATIONS.add("minh long");
+        VN_LOCATIONS.add("nam đàn");
+        VN_LOCATIONS.add("tân hiệp");
+        VN_LOCATIONS.add("thanh sơn");
+        VN_LOCATIONS.add("dĩ an");
+        VN_LOCATIONS.add("thuận thành");
+        VN_LOCATIONS.add("điện biên phủ");
+        VN_LOCATIONS.add("vạn ninh");
+        VN_LOCATIONS.add("hưng yê");
+        VN_LOCATIONS.add("thái thuỵ");
+        VN_LOCATIONS.add("thanh xuân");
+        VN_LOCATIONS.add("cần giờ");
+        VN_LOCATIONS.add("ngũ hành sơn");
+        VN_LOCATIONS.add("ba tri");
+        VN_LOCATIONS.add("hồng dân");
+        VN_LOCATIONS.add("ninh giang");
+        VN_LOCATIONS.add("phan rang tháp chàm");
+        VN_LOCATIONS.add("than uyên");
+        VN_LOCATIONS.add("phú lộc");
+        VN_LOCATIONS.add("thanh chương");
+        VN_LOCATIONS.add("lục ngạn");
+        VN_LOCATIONS.add("năm căn");
+        VN_LOCATIONS.add("điện biên đông");
+        VN_LOCATIONS.add("hữu lũng");
+        VN_LOCATIONS.add("hoàng su phì");
+        VN_LOCATIONS.add("tây hồ");
+        VN_LOCATIONS.add("bắc yên");
+        VN_LOCATIONS.add("sài gòn");
+        VN_LOCATIONS.add("vĩnh cửu");
+        VN_LOCATIONS.add("bình phước");
+        VN_LOCATIONS.add("nam sách");
+        VN_LOCATIONS.add("hưng hà");
+        VN_LOCATIONS.add("bình chánh");
+        VN_LOCATIONS.add("uông bí");
+        VN_LOCATIONS.add("ea h'leo");
+        VN_LOCATIONS.add("tam điệp");
+        VN_LOCATIONS.add("nam giang");
+        VN_LOCATIONS.add("trùng khánh");
+        VN_LOCATIONS.add("gia lộc");
+        VN_LOCATIONS.add("tam dương");
+        VN_LOCATIONS.add("hoà an");
+        VN_LOCATIONS.add("thừa thiên huế");
+        VN_LOCATIONS.add("nông cống");
+        VN_LOCATIONS.add("tam kỳ");
+        VN_LOCATIONS.add("đak pơ");
+        VN_LOCATIONS.add("bình thạnh");
+        VN_LOCATIONS.add("hà nội");
+        VN_LOCATIONS.add("châu thành");
+        VN_LOCATIONS.add("tiên lữ");
+        VN_LOCATIONS.add("cầu kè");
+        VN_LOCATIONS.add("ninh kiều");
+        VN_LOCATIONS.add("buôn ma thuột");
+        VN_LOCATIONS.add("an khê");
+        VN_LOCATIONS.add("đức huệ");
+        VN_LOCATIONS.add("tiền hải");
+        VN_LOCATIONS.add("tuy phước");
+        VN_LOCATIONS.add("bà rịa");
+        VN_LOCATIONS.add("đa krông");
+        VN_LOCATIONS.add("đồng xoài");
+        VN_LOCATIONS.add("ba vì");
+        VN_LOCATIONS.add("quảng ninh");
+        VN_LOCATIONS.add("điện biên");
+        VN_LOCATIONS.add("hà trung");
+        VN_LOCATIONS.add("thanh oai");
+        VN_LOCATIONS.add("trà cú");
+        VN_LOCATIONS.add("văn yên");
+        VN_LOCATIONS.add("bình xuyên");
+        VN_LOCATIONS.add("hoà vang");
+        VN_LOCATIONS.add("trà lĩnh");
+        VN_LOCATIONS.add("yên khánh");
+        VN_LOCATIONS.add("kbang");
+        VN_LOCATIONS.add("hoàng sa");
+        VN_LOCATIONS.add("văn quan");
+        VN_LOCATIONS.add("ba chẽ");
+        VN_LOCATIONS.add("nho quan");
+        VN_LOCATIONS.add("khánh hoà");
+        VN_LOCATIONS.add("đăk mil");
+        VN_LOCATIONS.add("kiến thuỵ");
+        VN_LOCATIONS.add("đầm hà");
+        VN_LOCATIONS.add("hàm tân");
+        VN_LOCATIONS.add("phù cát");
+        VN_LOCATIONS.add("kim bảng");
+        VN_LOCATIONS.add("vũng tầu");
+        VN_LOCATIONS.add("kiên giang");
+        VN_LOCATIONS.add("long hồ");
+        VN_LOCATIONS.add("mường chà");
+        VN_LOCATIONS.add("thanh ba");
+        VN_LOCATIONS.add("đại lộc");
+        VN_LOCATIONS.add("mê linh");
+        VN_LOCATIONS.add("mường lát");
+        VN_LOCATIONS.add("đạ huoai");
+        VN_LOCATIONS.add("huế");
+        VN_LOCATIONS.add("cần thơ");
+        VN_LOCATIONS.add("vụ bản");
+        VN_LOCATIONS.add("thanh liêm");
+        VN_LOCATIONS.add("đoan hùng");
+        VN_LOCATIONS.add("hiệp hoà");
+        VN_LOCATIONS.add("bắc sơn");
+        VN_LOCATIONS.add("tân trụ");
+        VN_LOCATIONS.add("cần giuộc");
+        VN_LOCATIONS.add("đăk glong");
+        VN_LOCATIONS.add("hậu lộc");
+        VN_LOCATIONS.add("kỳ anh");
+        VN_LOCATIONS.add("cai lậy");
+        VN_LOCATIONS.add("krông bông");
+        VN_LOCATIONS.add("yên lập");
+        VN_LOCATIONS.add("mù căng chải");
+        VN_LOCATIONS.add("mỹ tú");
+        VN_LOCATIONS.add("trạm tấu");
+        VN_LOCATIONS.add("cư jút");
+        VN_LOCATIONS.add("quỳ hợp");
+        VN_LOCATIONS.add("tân phước");
+        VN_LOCATIONS.add("vĩnh lợi");
+        VN_LOCATIONS.add("đồng văn");
+        VN_LOCATIONS.add("đông sơn");
+        VN_LOCATIONS.add("tây trà");
+        VN_LOCATIONS.add("lộc ninh");
+        VN_LOCATIONS.add("sầm sơn");
+        VN_LOCATIONS.add("lạng sơn");
+        VN_LOCATIONS.add("sa pa");
+        VN_LOCATIONS.add("hàm yên");
+        VN_LOCATIONS.add("vân đồn");
+        VN_LOCATIONS.add("đà bắc");
+        VN_LOCATIONS.add("vân canh");
+        VN_LOCATIONS.add("sơn hoà");
+        VN_LOCATIONS.add("thuận bắc");
+        VN_LOCATIONS.add("châu đức");
+        VN_LOCATIONS.add("thường xuân");
+        VN_LOCATIONS.add("định hoá");
+        VN_LOCATIONS.add("giồng trôm");
+        VN_LOCATIONS.add("núi thành");
+        VN_LOCATIONS.add("rạch giá");
+        VN_LOCATIONS.add("con cuông");
+        VN_LOCATIONS.add("ninh bình");
+        VN_LOCATIONS.add("đồng hới");
+        VN_LOCATIONS.add("tân an");
+        VN_LOCATIONS.add("trực ninh");
+        VN_LOCATIONS.add("thuận châu");
+        VN_LOCATIONS.add("vinh");
+        VN_LOCATIONS.add("trần văn thời");
+        VN_LOCATIONS.add("minh hoá");
+        VN_LOCATIONS.add("yên mỹ");
+        VN_LOCATIONS.add("quan hoá");
+        VN_LOCATIONS.add("văn bàn");
+        VN_LOCATIONS.add("cam lộ");
+        VN_LOCATIONS.add("lang chánh");
+        VN_LOCATIONS.add("phù yên");
+        VN_LOCATIONS.add("đăk tô");
+        VN_LOCATIONS.add("hoa lư");
+        VN_LOCATIONS.add("lục yên");
+        VN_LOCATIONS.add("đức phổ");
+        VN_LOCATIONS.add("hà nam");
+        VN_LOCATIONS.add("tuy an");
+        VN_LOCATIONS.add("an giang");
+        VN_LOCATIONS.add("ba bể");
+        VN_LOCATIONS.add("xuân trường");
+        VN_LOCATIONS.add("cát hải");
+        VN_LOCATIONS.add("kon tum");
+        VN_LOCATIONS.add("bù đăng");
+        VN_LOCATIONS.add("krông năng");
+        VN_LOCATIONS.add("an biên");
+        VN_LOCATIONS.add("yên châu");
+        VN_LOCATIONS.add("phú thọ");
+        VN_LOCATIONS.add("tam nông");
+        VN_LOCATIONS.add("quỳnh lưu");
+        VN_LOCATIONS.add("đình lập");
+        VN_LOCATIONS.add("nghi lộc");
+        VN_LOCATIONS.add("chợ mới");
+        VN_LOCATIONS.add("đức trọng");
+        VN_LOCATIONS.add("đầm dơi");
+        VN_LOCATIONS.add("long đất");
+        VN_LOCATIONS.add("mường lay");
+        VN_LOCATIONS.add("tiền giang");
+        VN_LOCATIONS.add("thông nông");
+        VN_LOCATIONS.add("phú yên");
+        VN_LOCATIONS.add("quảng bình");
+        VN_LOCATIONS.add("sìn hồ");
+        VN_LOCATIONS.add("tuy phong");
+        VN_LOCATIONS.add("ba đình");
+        VN_LOCATIONS.add("phù mỹ");
+        VN_LOCATIONS.add("đức hoà");
+        VN_LOCATIONS.add("bảo lạc");
+        VN_LOCATIONS.add("đăk glei");
+        VN_LOCATIONS.add("bến tre");
+        VN_LOCATIONS.add("như thanh");
+        VN_LOCATIONS.add("thanh thuỷ");
+        VN_LOCATIONS.add("đà lạt");
+        VN_LOCATIONS.add("đức cơ");
+        VN_LOCATIONS.add("văn chấn");
+        VN_LOCATIONS.add("bà rịa vũng tàu");
+        VN_LOCATIONS.add("vĩnh hưng");
+        VN_LOCATIONS.add("cao phong");
+        VN_LOCATIONS.add("nam trà my");
+        VN_LOCATIONS.add("phú giáo");
+        VN_LOCATIONS.add("phú bình");
+        VN_LOCATIONS.add("ayun pa");
+        VN_LOCATIONS.add("mỹ đức");
+        VN_LOCATIONS.add("tuần giáo");
+    }
+
+    public static Set<String> VN_FIRST_SENT_WORDS;
+    static {
+        VN_FIRST_SENT_WORDS = new HashSet<String>();
+        VN_FIRST_SENT_WORDS.add("được");
+        VN_FIRST_SENT_WORDS.add("cty");
+        VN_FIRST_SENT_WORDS.add("mẹ");
+        VN_FIRST_SENT_WORDS.add("trừ");
+        VN_FIRST_SENT_WORDS.add("lên");
+        VN_FIRST_SENT_WORDS.add("trưởng");
+        VN_FIRST_SENT_WORDS.add("là");
+        VN_FIRST_SENT_WORDS.add("chàng");
+        VN_FIRST_SENT_WORDS.add("theo");
+        VN_FIRST_SENT_WORDS.add("tên");
+        VN_FIRST_SENT_WORDS.add("giờ");
+        VN_FIRST_SENT_WORDS.add("biết");
+        VN_FIRST_SENT_WORDS.add("già");
+        VN_FIRST_SENT_WORDS.add("những");
+        VN_FIRST_SENT_WORDS.add("thấy");
+        VN_FIRST_SENT_WORDS.add("thương");
+        VN_FIRST_SENT_WORDS.add("lang");
+        VN_FIRST_SENT_WORDS.add("gái");
+        VN_FIRST_SENT_WORDS.add("mà");
+        VN_FIRST_SENT_WORDS.add("xóm");
+        VN_FIRST_SENT_WORDS.add("má");
+        VN_FIRST_SENT_WORDS.add("cầu");
+        VN_FIRST_SENT_WORDS.add("khách");
+        VN_FIRST_SENT_WORDS.add("nhánh");
+        VN_FIRST_SENT_WORDS.add("hôm");
+        VN_FIRST_SENT_WORDS.add("nhớ");
+        VN_FIRST_SENT_WORDS.add("hạng");
+        VN_FIRST_SENT_WORDS.add("huyện");
+        VN_FIRST_SENT_WORDS.add("vậy");
+        VN_FIRST_SENT_WORDS.add("nhà");
+        VN_FIRST_SENT_WORDS.add("ấp");
+        VN_FIRST_SENT_WORDS.add("sông");
+        VN_FIRST_SENT_WORDS.add("thằng");
+        VN_FIRST_SENT_WORDS.add("nài");
+        VN_FIRST_SENT_WORDS.add("ngành");
+        VN_FIRST_SENT_WORDS.add("nếu");
+        VN_FIRST_SENT_WORDS.add("trời");
+        VN_FIRST_SENT_WORDS.add("đảng");
+        VN_FIRST_SENT_WORDS.add("vào");
+        VN_FIRST_SENT_WORDS.add("thầy");
+        VN_FIRST_SENT_WORDS.add("hai");
+        VN_FIRST_SENT_WORDS.add("vùng");
+        VN_FIRST_SENT_WORDS.add("chuyện");
+        VN_FIRST_SENT_WORDS.add("nhìn");
+        VN_FIRST_SENT_WORDS.add("tim");
+        VN_FIRST_SENT_WORDS.add("cha");
+        VN_FIRST_SENT_WORDS.add("sang");
+        VN_FIRST_SENT_WORDS.add("bên");
+        VN_FIRST_SENT_WORDS.add("đường");
+        VN_FIRST_SENT_WORDS.add("cho");
+        VN_FIRST_SENT_WORDS.add("bảng");
+        VN_FIRST_SENT_WORDS.add("khi");
+        VN_FIRST_SENT_WORDS.add("quận");
+        VN_FIRST_SENT_WORDS.add("biển");
+        VN_FIRST_SENT_WORDS.add("cu");
+        VN_FIRST_SENT_WORDS.add("metro");
+        VN_FIRST_SENT_WORDS.add("vốn");
+        VN_FIRST_SENT_WORDS.add("đến");
+        VN_FIRST_SENT_WORDS.add("năm");
+        VN_FIRST_SENT_WORDS.add("khu");
+        VN_FIRST_SENT_WORDS.add("đài");
+        VN_FIRST_SENT_WORDS.add("miền");
+        VN_FIRST_SENT_WORDS.add("việc");
+        VN_FIRST_SENT_WORDS.add("do");
+        VN_FIRST_SENT_WORDS.add("lập");
+        VN_FIRST_SENT_WORDS.add("nghe");
+        VN_FIRST_SENT_WORDS.add("mắt");
+        VN_FIRST_SENT_WORDS.add("viện");
+        VN_FIRST_SENT_WORDS.add("cả");
+        VN_FIRST_SENT_WORDS.add("em");
+        VN_FIRST_SENT_WORDS.add("rừng");
+        VN_FIRST_SENT_WORDS.add("liệu");
+        VN_FIRST_SENT_WORDS.add("bố");
+        VN_FIRST_SENT_WORDS.add("bộ");
+        VN_FIRST_SENT_WORDS.add("cháu");
+        VN_FIRST_SENT_WORDS.add("riêng");
+        VN_FIRST_SENT_WORDS.add("bà");
+        VN_FIRST_SENT_WORDS.add("số");
+        VN_FIRST_SENT_WORDS.add("chị");
+        VN_FIRST_SENT_WORDS.add("người");
+        VN_FIRST_SENT_WORDS.add("bé");
+        VN_FIRST_SENT_WORDS.add("tàu");
+        VN_FIRST_SENT_WORDS.add("làng");
+        VN_FIRST_SENT_WORDS.add("cảng");
+        VN_FIRST_SENT_WORDS.add("sở");
+        VN_FIRST_SENT_WORDS.add("chiếc");
+        VN_FIRST_SENT_WORDS.add("tết");
+        VN_FIRST_SENT_WORDS.add("cậu");
+        VN_FIRST_SENT_WORDS.add("luật");
+        VN_FIRST_SENT_WORDS.add("chờ");
+        VN_FIRST_SENT_WORDS.add("rời");
+        VN_FIRST_SENT_WORDS.add("chắc");
+        VN_FIRST_SENT_WORDS.add("hội");
+        VN_FIRST_SENT_WORDS.add("chợ");
+        VN_FIRST_SENT_WORDS.add("viên");
+        VN_FIRST_SENT_WORDS.add("cụ");
+        VN_FIRST_SENT_WORDS.add("nay");
+        VN_FIRST_SENT_WORDS.add("thuốc");
+        VN_FIRST_SENT_WORDS.add("bọn");
+        VN_FIRST_SENT_WORDS.add("tờ");
+        VN_FIRST_SENT_WORDS.add("phía");
+        VN_FIRST_SENT_WORDS.add("chữ");
+        VN_FIRST_SENT_WORDS.add("xe");
+        VN_FIRST_SENT_WORDS.add("cò");
+        VN_FIRST_SENT_WORDS.add("có");
+        VN_FIRST_SENT_WORDS.add("cô");
+        VN_FIRST_SENT_WORDS.add("dân");
+        VN_FIRST_SENT_WORDS.add("nhóm");
+        VN_FIRST_SENT_WORDS.add("song");
+        VN_FIRST_SENT_WORDS.add("chú");
+        VN_FIRST_SENT_WORDS.add("từ");
+        VN_FIRST_SENT_WORDS.add("như");
+        VN_FIRST_SENT_WORDS.add("ngày");
+        VN_FIRST_SENT_WORDS.add("phim");
+        VN_FIRST_SENT_WORDS.add("chính");
+        VN_FIRST_SENT_WORDS.add("tân");
+        VN_FIRST_SENT_WORDS.add("gặp");
+        VN_FIRST_SENT_WORDS.add("các");
+        VN_FIRST_SENT_WORDS.add("quê");
+        VN_FIRST_SENT_WORDS.add("dì");
+        VN_FIRST_SENT_WORDS.add("bởi");
+        VN_FIRST_SENT_WORDS.add("quí");
+        VN_FIRST_SENT_WORDS.add("về");
+        VN_FIRST_SENT_WORDS.add("trại");
+        VN_FIRST_SENT_WORDS.add("tại");
+        VN_FIRST_SENT_WORDS.add("lão");
+        VN_FIRST_SENT_WORDS.add("đảo");
+        VN_FIRST_SENT_WORDS.add("nguyên");
+        VN_FIRST_SENT_WORDS.add("còn");
+        VN_FIRST_SENT_WORDS.add("tiếng");
+        VN_FIRST_SENT_WORDS.add("dòng");
+        VN_FIRST_SENT_WORDS.add("và");
+        VN_FIRST_SENT_WORDS.add("hiện");
+        VN_FIRST_SENT_WORDS.add("vợ");
+        VN_FIRST_SENT_WORDS.add("thuyền");
+        VN_FIRST_SENT_WORDS.add("vụ");
+        VN_FIRST_SENT_WORDS.add("đoàn");
+        VN_FIRST_SENT_WORDS.add("thành");
+        VN_FIRST_SENT_WORDS.add("giới");
+        VN_FIRST_SENT_WORDS.add("bến");
+        VN_FIRST_SENT_WORDS.add("vì");
+        VN_FIRST_SENT_WORDS.add("đi");
+        VN_FIRST_SENT_WORDS.add("sân");
+        VN_FIRST_SENT_WORDS.add("sâm");
+        VN_FIRST_SENT_WORDS.add("con");
+        VN_FIRST_SENT_WORDS.add("bác");
+        VN_FIRST_SENT_WORDS.add("cùng");
+        VN_FIRST_SENT_WORDS.add("báo");
+        VN_FIRST_SENT_WORDS.add("chồng");
+        VN_FIRST_SENT_WORDS.add("hàng");
+        VN_FIRST_SENT_WORDS.add("đất");
+        VN_FIRST_SENT_WORDS.add("mỗi");
+        VN_FIRST_SENT_WORDS.add("núi");
+        VN_FIRST_SENT_WORDS.add("phòng");
+        VN_FIRST_SENT_WORDS.add("xã");
+        VN_FIRST_SENT_WORDS.add("hồ");
+        VN_FIRST_SENT_WORDS.add("ông");
+        VN_FIRST_SENT_WORDS.add("giọng");
+        VN_FIRST_SENT_WORDS.add("trường");
+        VN_FIRST_SENT_WORDS.add("đèo");
+        VN_FIRST_SENT_WORDS.add("trùm");
+        VN_FIRST_SENT_WORDS.add("nhiều");
+        VN_FIRST_SENT_WORDS.add("thư");
+        VN_FIRST_SENT_WORDS.add("cục");
+        VN_FIRST_SENT_WORDS.add("nước");
+        VN_FIRST_SENT_WORDS.add("thôn");
+        VN_FIRST_SENT_WORDS.add("bạn");
+        VN_FIRST_SENT_WORDS.add("nàng");
+        VN_FIRST_SENT_WORDS.add("bệnh");
+        VN_FIRST_SENT_WORDS.add("cụm");
+        VN_FIRST_SENT_WORDS.add("tướng");
+        VN_FIRST_SENT_WORDS.add("buôn");
+        VN_FIRST_SENT_WORDS.add("để");
+        VN_FIRST_SENT_WORDS.add("anh");
+        VN_FIRST_SENT_WORDS.add("lính");
+        VN_FIRST_SENT_WORDS.add("với");
+        VN_FIRST_SENT_WORDS.add("ngoài");
+        VN_FIRST_SENT_WORDS.add("trên");
+        VN_FIRST_SENT_WORDS.add("hỏi");
+        VN_FIRST_SENT_WORDS.add("sau");
+        VN_FIRST_SENT_WORDS.add("đội");
+        VN_FIRST_SENT_WORDS.add("gọi");
+        VN_FIRST_SENT_WORDS.add("rồi");
+        VN_FIRST_SENT_WORDS.add("một");
+        VN_FIRST_SENT_WORDS.add("chúc");
+        VN_FIRST_SENT_WORDS.add("nhưng");
+        VN_FIRST_SENT_WORDS.add("đêm");
+        VN_FIRST_SENT_WORDS.add("phó");
+        VN_FIRST_SENT_WORDS.add("bỗng");
+        VN_FIRST_SENT_WORDS.add("trong");
+        VN_FIRST_SENT_WORDS.add("trước");
+        VN_FIRST_SENT_WORDS.add("bản");
+        VN_FIRST_SENT_WORDS.add("cuốn");
+        VN_FIRST_SENT_WORDS.add("chùa");
+        VN_FIRST_SENT_WORDS.add("ban");
+        VN_FIRST_SENT_WORDS.add("giữa");
+        VN_FIRST_SENT_WORDS.add("ngay");
+        VN_FIRST_SENT_WORDS.add("lúc");
+        VN_FIRST_SENT_WORDS.add("tỉnh");
+        VN_FIRST_SENT_WORDS.add("tuy");
+        VN_FIRST_SENT_WORDS.add("vẫn");
+
+        VN_FIRST_SENT_WORDS.add("trà");
+        VN_FIRST_SENT_WORDS.add("ôi");
+        VN_FIRST_SENT_WORDS.add("cặp");
+        VN_FIRST_SENT_WORDS.add("taxi");
+        VN_FIRST_SENT_WORDS.add("nhiễm");
+        VN_FIRST_SENT_WORDS.add("virus");
+        VN_FIRST_SENT_WORDS.add("hồi");
+        VN_FIRST_SENT_WORDS.add("nghĩa");
+        VN_FIRST_SENT_WORDS.add("đọc");
+        VN_FIRST_SENT_WORDS.add("nhờ");
+        VN_FIRST_SENT_WORDS.add("tới");
+        VN_FIRST_SENT_WORDS.add("ong");
+        VN_FIRST_SENT_WORDS.add("website");
+        VN_FIRST_SENT_WORDS.add("bóng");
+        VN_FIRST_SENT_WORDS.add("quít");
+        VN_FIRST_SENT_WORDS.add("kungfu");
+        VN_FIRST_SENT_WORDS.add("ra");
+        VN_FIRST_SENT_WORDS.add("đồng");
+        VN_FIRST_SENT_WORDS.add("băng");
+        VN_FIRST_SENT_WORDS.add("ba");
+        VN_FIRST_SENT_WORDS.add("bầu");
+        VN_FIRST_SENT_WORDS.add("hay");
+        VN_FIRST_SENT_WORDS.add("giải");
+        VN_FIRST_SENT_WORDS.add("giao");
+        VN_FIRST_SENT_WORDS.add("cửa");
+        VN_FIRST_SENT_WORDS.add("phần");
+        VN_FIRST_SENT_WORDS.add("sinh");
+        VN_FIRST_SENT_WORDS.add("vietcombank");
+        VN_FIRST_SENT_WORDS.add("vàng");
+        VN_FIRST_SENT_WORDS.add("fred");
+        VN_FIRST_SENT_WORDS.add("tập");
+        VN_FIRST_SENT_WORDS.add("toyota");
+        VN_FIRST_SENT_WORDS.add("bế");
+        VN_FIRST_SENT_WORDS.add("tuồng");
+        VN_FIRST_SENT_WORDS.add("nguồn");
+        VN_FIRST_SENT_WORDS.add("phường");
+        VN_FIRST_SENT_WORDS.add("làm");
+        VN_FIRST_SENT_WORDS.add("tuyển");
+        VN_FIRST_SENT_WORDS.add("đền");
+        VN_FIRST_SENT_WORDS.add("mong");
+        VN_FIRST_SENT_WORDS.add("nghỉ");
+        VN_FIRST_SENT_WORDS.add("hầm");
+        VN_FIRST_SENT_WORDS.add("trán");
+        VN_FIRST_SENT_WORDS.add("dắt");
+        VN_FIRST_SENT_WORDS.add("sợ");
+        VN_FIRST_SENT_WORDS.add("chỗ");
+        VN_FIRST_SENT_WORDS.add("lái");
+        VN_FIRST_SENT_WORDS.add("xem");
+        VN_FIRST_SENT_WORDS.add("chủ");
+        VN_FIRST_SENT_WORDS.add("chứ");
+        VN_FIRST_SENT_WORDS.add("đợt");
+        VN_FIRST_SENT_WORDS.add("đoạn");
+        VN_FIRST_SENT_WORDS.add("đồn");
+        VN_FIRST_SENT_WORDS.add("trục");
+        VN_FIRST_SENT_WORDS.add("tự");
+        VN_FIRST_SENT_WORDS.add("neil");
+        VN_FIRST_SENT_WORDS.add("điện");
+        VN_FIRST_SENT_WORDS.add("trạm");
+        VN_FIRST_SENT_WORDS.add("gần");
+        VN_FIRST_SENT_WORDS.add("giặc");
+        VN_FIRST_SENT_WORDS.add("cúng");
+        VN_FIRST_SENT_WORDS.add("dù");
+        VN_FIRST_SENT_WORDS.add("vịnh");
+        VN_FIRST_SENT_WORDS.add("quân");
+        VN_FIRST_SENT_WORDS.add("dãy");
+        VN_FIRST_SENT_WORDS.add("pha");
+        VN_FIRST_SENT_WORDS.add("toàn");
+        VN_FIRST_SENT_WORDS.add("tháp");
+        VN_FIRST_SENT_WORDS.add("quĩ");
+        VN_FIRST_SENT_WORDS.add("đĩa");
+        VN_FIRST_SENT_WORDS.add("gà");
+        VN_FIRST_SENT_WORDS.add("lao");
+        VN_FIRST_SENT_WORDS.add("bốn");
+        VN_FIRST_SENT_WORDS.add("họ");
+        VN_FIRST_SENT_WORDS.add("họp");
+        VN_FIRST_SENT_WORDS.add("đèn");
+        VN_FIRST_SENT_WORDS.add("cũng");
+        VN_FIRST_SENT_WORDS.add("động");
+        VN_FIRST_SENT_WORDS.add("mặt");
+        VN_FIRST_SENT_WORDS.add("đầm");
+        VN_FIRST_SENT_WORDS.add("cống");
+        VN_FIRST_SENT_WORDS.add("nơi");
+        VN_FIRST_SENT_WORDS.add("tùng");
+        VN_FIRST_SENT_WORDS.add("phố");
+        VN_FIRST_SENT_WORDS.add("đầu");
+        VN_FIRST_SENT_WORDS.add("vượt");
+        VN_FIRST_SENT_WORDS.add("sao");
+        VN_FIRST_SENT_WORDS.add("cách");
+        VN_FIRST_SENT_WORDS.add("hoặc");
+        VN_FIRST_SENT_WORDS.add("của");
+        VN_FIRST_SENT_WORDS.add("hết");
+        VN_FIRST_SENT_WORDS.add("đỉnh");
+        VN_FIRST_SENT_WORDS.add("kênh");
+        VN_FIRST_SENT_WORDS.add("quyền");
+        VN_FIRST_SENT_WORDS.add("bar");
+        VN_FIRST_SENT_WORDS.add("chống");
+        VN_FIRST_SENT_WORDS.add("khắp");
+        VN_FIRST_SENT_WORDS.add("sách");
+        VN_FIRST_SENT_WORDS.add("wikipedia");
+    }
+
+    public static Set<String> VN_MIDDLE_NAMES;
+    static {
+        VN_MIDDLE_NAMES = new HashSet<String>();
+        VN_MIDDLE_NAMES.add("thúy");
+        VN_MIDDLE_NAMES.add("bao");
+        VN_MIDDLE_NAMES.add("thùy");
+        VN_MIDDLE_NAMES.add("mạnh");
+        VN_MIDDLE_NAMES.add("mỹ");
+        VN_MIDDLE_NAMES.add("an");
+        VN_MIDDLE_NAMES.add("hoa");
+        VN_MIDDLE_NAMES.add("nữ");
+        VN_MIDDLE_NAMES.add("trường");
+        VN_MIDDLE_NAMES.add("vĩnh");
+        VN_MIDDLE_NAMES.add("đắc");
+        VN_MIDDLE_NAMES.add("minh");
+        VN_MIDDLE_NAMES.add("thanh");
+        VN_MIDDLE_NAMES.add("thi");
+        VN_MIDDLE_NAMES.add("thu");
+        VN_MIDDLE_NAMES.add("ninh");
+        VN_MIDDLE_NAMES.add("đình");
+        VN_MIDDLE_NAMES.add("hải");
+        VN_MIDDLE_NAMES.add("tuấn");
+        VN_MIDDLE_NAMES.add("bội");
+        VN_MIDDLE_NAMES.add("thuý");
+        VN_MIDDLE_NAMES.add("việt");
+        VN_MIDDLE_NAMES.add("nguyễn");
+        VN_MIDDLE_NAMES.add("bá");
+        VN_MIDDLE_NAMES.add("phương");
+        VN_MIDDLE_NAMES.add("bé");
+        VN_MIDDLE_NAMES.add("tố");
+        VN_MIDDLE_NAMES.add("quốc");
+        VN_MIDDLE_NAMES.add("nguyệt");
+        VN_MIDDLE_NAMES.add("tử");
+        VN_MIDDLE_NAMES.add("cảnh");
+        VN_MIDDLE_NAMES.add("trọng");
+        VN_MIDDLE_NAMES.add("huy");
+        VN_MIDDLE_NAMES.add("nam");
+        VN_MIDDLE_NAMES.add("chí");
+        VN_MIDDLE_NAMES.add("thái");
+        VN_MIDDLE_NAMES.add("thành");
+        VN_MIDDLE_NAMES.add("chính");
+        VN_MIDDLE_NAMES.add("đinh");
+        VN_MIDDLE_NAMES.add("mai");
+        VN_MIDDLE_NAMES.add("thiên");
+        VN_MIDDLE_NAMES.add("tôn");
+        VN_MIDDLE_NAMES.add("phi");
+        VN_MIDDLE_NAMES.add("hà");
+        VN_MIDDLE_NAMES.add("khắc");
+        VN_MIDDLE_NAMES.add("trúc");
+        VN_MIDDLE_NAMES.add("lan");
+        VN_MIDDLE_NAMES.add("doãn");
+        VN_MIDDLE_NAMES.add("nhất");
+        VN_MIDDLE_NAMES.add("huỳnh");
+        VN_MIDDLE_NAMES.add("quỳnh");
+        VN_MIDDLE_NAMES.add("diễm");
+        VN_MIDDLE_NAMES.add("khánh");
+        VN_MIDDLE_NAMES.add("hữu");
+        VN_MIDDLE_NAMES.add("tấn");
+        VN_MIDDLE_NAMES.add("anh");
+        VN_MIDDLE_NAMES.add("hoành");
+        VN_MIDDLE_NAMES.add("hoàng");
+        VN_MIDDLE_NAMES.add("diệu");
+        VN_MIDDLE_NAMES.add("lê");
+        VN_MIDDLE_NAMES.add("phú");
+        VN_MIDDLE_NAMES.add("duy");
+        VN_MIDDLE_NAMES.add("bảo");
+        VN_MIDDLE_NAMES.add("huyền");
+        VN_MIDDLE_NAMES.add("nguyên");
+        VN_MIDDLE_NAMES.add("bích");
+        VN_MIDDLE_NAMES.add("ánh");
+        VN_MIDDLE_NAMES.add("công");
+        VN_MIDDLE_NAMES.add("mộng");
+        VN_MIDDLE_NAMES.add("lệnh");
+        VN_MIDDLE_NAMES.add("cẩm");
+        VN_MIDDLE_NAMES.add("phúc");
+        VN_MIDDLE_NAMES.add("nhật");
+        VN_MIDDLE_NAMES.add("ngọc");
+        VN_MIDDLE_NAMES.add("thời");
+        VN_MIDDLE_NAMES.add("sơn");
+        VN_MIDDLE_NAMES.add("thuỳ");
+        VN_MIDDLE_NAMES.add("văn");
+        VN_MIDDLE_NAMES.add("vân");
+        VN_MIDDLE_NAMES.add("qui");
+        VN_MIDDLE_NAMES.add("hồng");
+        VN_MIDDLE_NAMES.add("thế");
+        VN_MIDDLE_NAMES.add("kim");
+        VN_MIDDLE_NAMES.add("thị");
+        VN_MIDDLE_NAMES.add("danh");
+        VN_MIDDLE_NAMES.add("hoài");
+        VN_MIDDLE_NAMES.add("tiến");
+        VN_MIDDLE_NAMES.add("tú");
+        VN_MIDDLE_NAMES.add("bửu");
+        VN_MIDDLE_NAMES.add("trung");
+        VN_MIDDLE_NAMES.add("thiện");
+        VN_MIDDLE_NAMES.add("tuyết");
+        VN_MIDDLE_NAMES.add("đăng");
+        VN_MIDDLE_NAMES.add("như");
+        VN_MIDDLE_NAMES.add("hùng");
+        VN_MIDDLE_NAMES.add("vô");
+        VN_MIDDLE_NAMES.add("miên");
+        VN_MIDDLE_NAMES.add("quý");
+        VN_MIDDLE_NAMES.add("quang");
+        VN_MIDDLE_NAMES.add("đức");
+        VN_MIDDLE_NAMES.add("ưng");
+        VN_MIDDLE_NAMES.add("tường");
+        VN_MIDDLE_NAMES.add("kiều");
+        VN_MIDDLE_NAMES.add("thảo");
+        VN_MIDDLE_NAMES.add("xuân");
+        VN_MIDDLE_NAMES.add("viết");
+        VN_MIDDLE_NAMES.add("vũ");
+        VN_MIDDLE_NAMES.add("kế");
+        VN_MIDDLE_NAMES.add("gia");
+        VN_MIDDLE_NAMES.add("phước");
+        VN_MIDDLE_NAMES.add("linh");
+        VN_MIDDLE_NAMES.add("cao");
+        VN_MIDDLE_NAMES.add("lệ");
+    }
+
+    public static Set<String> VN_FAMILY_NAMES;
+    static {
+        VN_FAMILY_NAMES = new HashSet<String>();
+        VN_FAMILY_NAMES.add("bảo");
+        VN_FAMILY_NAMES.add("phan");
+        VN_FAMILY_NAMES.add("lý");
+        VN_FAMILY_NAMES.add("bao");
+        VN_FAMILY_NAMES.add("huyền");
+        VN_FAMILY_NAMES.add("lưu");
+        VN_FAMILY_NAMES.add("nguyên");
+        VN_FAMILY_NAMES.add("diêu");
+        VN_FAMILY_NAMES.add("vĩnh");
+        VN_FAMILY_NAMES.add("ngô");
+        VN_FAMILY_NAMES.add("công");
+        VN_FAMILY_NAMES.add("giang");
+        VN_FAMILY_NAMES.add("đào");
+        VN_FAMILY_NAMES.add("bùi");
+        VN_FAMILY_NAMES.add("hông");
+        VN_FAMILY_NAMES.add("ngọc");
+        VN_FAMILY_NAMES.add("chi");
+        VN_FAMILY_NAMES.add("bưu");
+        VN_FAMILY_NAMES.add("tạ");
+        VN_FAMILY_NAMES.add("nguyễn");
+        VN_FAMILY_NAMES.add("văn");
+        VN_FAMILY_NAMES.add("qui");
+        VN_FAMILY_NAMES.add("hồng");
+        VN_FAMILY_NAMES.add("quy");
+        VN_FAMILY_NAMES.add("từ");
+        VN_FAMILY_NAMES.add("trân");
+        VN_FAMILY_NAMES.add("hường");
+        VN_FAMILY_NAMES.add("tô");
+        VN_FAMILY_NAMES.add("mạc");
+        VN_FAMILY_NAMES.add("bửu");
+        VN_FAMILY_NAMES.add("đặng");
+        VN_FAMILY_NAMES.add("huyên");
+        VN_FAMILY_NAMES.add("lâm");
+        VN_FAMILY_NAMES.add("võ");
+        VN_FAMILY_NAMES.add("đinh");
+        VN_FAMILY_NAMES.add("miên");
+        VN_FAMILY_NAMES.add("mai");
+        VN_FAMILY_NAMES.add("hương");
+        VN_FAMILY_NAMES.add("lương");
+        VN_FAMILY_NAMES.add("hồ");
+        VN_FAMILY_NAMES.add("tôn");
+        VN_FAMILY_NAMES.add("ưng");
+        VN_FAMILY_NAMES.add("la");
+        VN_FAMILY_NAMES.add("thân");
+        VN_FAMILY_NAMES.add("hà");
+        VN_FAMILY_NAMES.add("dương");
+        VN_FAMILY_NAMES.add("trịnh");
+        VN_FAMILY_NAMES.add("tằng");
+        VN_FAMILY_NAMES.add("lan");
+        VN_FAMILY_NAMES.add("doãn");
+        VN_FAMILY_NAMES.add("vinh");
+        VN_FAMILY_NAMES.add("trần");
+        VN_FAMILY_NAMES.add("huỳnh");
+        VN_FAMILY_NAMES.add("vương");
+        VN_FAMILY_NAMES.add("vũ");
+        VN_FAMILY_NAMES.add("cao");
+        VN_FAMILY_NAMES.add("phạm");
+        VN_FAMILY_NAMES.add("hoàng");
+        VN_FAMILY_NAMES.add("đỗ");
+        VN_FAMILY_NAMES.add("trương");
+        VN_FAMILY_NAMES.add("đoàn");
+        VN_FAMILY_NAMES.add("diệp");
+        VN_FAMILY_NAMES.add("lê");
+    }
+}
\ No newline at end of file
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java
new file mode 100644
index 0000000000000000000000000000000000000000..379c86be55e21aa1ac83c41f0eb191bc225555ce
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java
@@ -0,0 +1,245 @@
+package vn.corenlp.wordsegmenter;
+
+import org.apache.log4j.Logger;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * @author DatQuocNguyen
+ */
+public class WordSegmenter {
+    private  Node root;
+    private static WordSegmenter wordSegmenter = null;
+    public final static Logger LOGGER = Logger.getLogger(WordSegmenter.class);
+    public WordSegmenter()
+            throws IOException {
+        LOGGER.info("Loading Word Segmentation model");
+        String modelPath = vn.pipeline.Utils.jarDir + "/models/wordsegmenter/wordsegmenter.rdr";
+        if (!new File(modelPath).exists())
+            throw new IOException("WordSegmenter: " + modelPath + " is not found!");
+
+        this.constructTreeFromRulesFile(modelPath);
+    }
+
+    public static WordSegmenter initialize() throws IOException {
+        if(wordSegmenter == null) {
+            wordSegmenter = new WordSegmenter();
+        }
+        return wordSegmenter;
+    }
+    private void constructTreeFromRulesFile(String rulesFilePath)
+            throws IOException {
+        BufferedReader buffer = new BufferedReader(
+                new InputStreamReader(new FileInputStream(new File(rulesFilePath)), "UTF-8"));
+        String line = buffer.readLine();
+
+        this.root = new Node(new FWObject(false), "NN", null, null, null, 0);
+
+        Node currentNode = this.root;
+        int currentDepth = 0;
+
+        for (; (line = buffer.readLine()) != null; ) {
+            int depth = 0;
+            for (int i = 0; i <= 6; i++) { // Supposed that the maximum
+                // exception level is up to 6.
+                if (line.charAt(i) == '\t')
+                    depth += 1;
+                else
+                    break;
+            }
+
+            line = line.trim();
+            if (line.length() == 0)
+                continue;
+
+            if (line.contains("cc:"))
+                continue;
+
+            FWObject condition = Utils.getCondition(line.split(" : ")[0].trim());
+            String conclusion = Utils.getConcreteValue(line.split(" : ")[1].trim());
+
+            Node node = new Node(condition, conclusion, null, null, null, depth);
+
+            if (depth > currentDepth) {
+                currentNode.setExceptNode(node);
+            } else if (depth == currentDepth) {
+                currentNode.setIfnotNode(node);
+            } else {
+                while (currentNode.getDepth() != depth)
+                    currentNode = currentNode.getFatherNode();
+                currentNode.setIfnotNode(node);
+            }
+            node.setFatherNode(currentNode);
+
+            currentNode = node;
+            currentDepth = depth;
+        }
+        buffer.close();
+    }
+
+    private Node findFiredNode(FWObject object) {
+        Node currentN = root;
+        Node firedN = null;
+        while (true) {
+            if (currentN.satisfy(object)) {
+                firedN = currentN;
+                if (currentN.getExceptNode() == null) {
+                    break;
+                } else {
+                    currentN = currentN.getExceptNode();
+                }
+            } else {
+                if (currentN.getIfnotNode() == null) {
+                    break;
+                } else {
+                    currentN = currentN.getIfnotNode();
+                }
+            }
+
+        }
+
+        return firedN;
+    }
+
+    private List<WordTag> getInitialSegmentation(String sentence)
+    {
+        List<WordTag> wordtags = new ArrayList<>();
+
+        for (String regex : Utils.NORMALIZER_KEYS)
+            if (sentence.contains(regex))
+                sentence = sentence.replaceAll(regex, Utils.NORMALIZER.get(regex));
+
+        List<String> tokens = Arrays.asList(sentence.split("\\s+"));
+        List<String> lowerTokens = Arrays.asList(sentence.toLowerCase().split("\\s+"));
+
+        int senLength = tokens.size();
+        int i = 0;
+        while (i < senLength) {
+            String token = tokens.get(i);
+            if (token.chars().allMatch(Character::isLetter)) {
+
+                if (Character.isLowerCase(token.charAt(0)) && (i + 1) < senLength) {
+                    if (Character.isUpperCase(tokens.get(i + 1).charAt(0))) {
+                        wordtags.add(new WordTag(token, "B"));
+                        i++;
+                        continue;
+                    }
+                }
+
+                boolean isSingleSyllabel = true;
+                for (int j = Math.min(i + 4, senLength); j > i + 1; j--) {
+                    String word = String.join(" ", lowerTokens.subList(i, j));
+                    if (Vocabulary.VN_DICT.contains(word)
+                            || Vocabulary.VN_LOCATIONS.contains(word) || Vocabulary.COUNTRY_L_NAME.contains(word)) {
+
+                        wordtags.add(new WordTag(token, "B"));
+                        for (int k = i + 1; k < j; k++)
+                            wordtags.add(new WordTag(tokens.get(k), "I"));
+
+                        i = j - 1;
+
+                        isSingleSyllabel = false;
+                        break;
+                    }
+                }
+                if (isSingleSyllabel) {
+                    String lowercasedToken = lowerTokens.get(i);
+
+                    if (Vocabulary.VN_FIRST_SENT_WORDS.contains(lowercasedToken)
+                            || Character.isLowerCase(token.charAt(0))
+                            || token.chars().allMatch(Character::isUpperCase)
+                            || Vocabulary.COUNTRY_S_NAME.contains(lowercasedToken)
+                            || Vocabulary.WORLD_COMPANY.contains(lowercasedToken)) {
+
+                        wordtags.add(new WordTag(token, "B"));
+                        i++;
+                        continue;
+
+                    }
+
+                    // Capitalized
+                    int ilower = i + 1;
+                    for (ilower = i + 1; ilower < Math.min(i + 4, senLength); ilower++) {
+                        String ntoken = tokens.get(ilower);
+                        if (Character.isLowerCase(ntoken.charAt(0))
+                                || !ntoken.chars().allMatch(Character::isLetter)
+                                || ntoken.equals("LBKT") || ntoken.equals("RBKT")) {
+                            break;
+                        }
+                    }
+
+                    if (ilower > i + 1) {
+                        boolean isNotMiddleName = true;
+                        if (Vocabulary.VN_MIDDLE_NAMES.contains(lowercasedToken) && (i >= 1)) {
+                            String prevT = tokens.get(i - 1);
+                            if (Character.isUpperCase(prevT.charAt(0))) {
+                                if (Vocabulary.VN_FAMILY_NAMES.contains(prevT.toLowerCase())) {
+                                    wordtags.add(new WordTag(token, "I"));
+                                    isNotMiddleName = false;
+                                }
+                            }
+                        }
+                        if (isNotMiddleName)
+                            wordtags.add(new WordTag(token, "B"));
+                        for (int k = i + 1; k < ilower; k++)
+                            wordtags.add(new WordTag(tokens.get(k), "I"));
+
+                        i = ilower - 1;
+                    }
+                    else {
+                        wordtags.add(new WordTag(token, "B"));
+                    }
+                }
+            }
+            else {
+                wordtags.add(new WordTag(token, "B"));
+            }
+
+            i++;
+        }
+
+        return wordtags;
+
+    }
+
+    public String segmentTokenizedString(String str)
+            throws IOException {
+        StringBuilder sb = new StringBuilder();
+
+        String line = str.trim();
+        if (line.length() == 0) {
+            return "\n";
+        }
+
+        List<WordTag> wordtags = this.getInitialSegmentation(line);
+
+        int size = wordtags.size();
+        for (int i = 0; i < size; i++) {
+            FWObject object = Utils.getObject(wordtags, size, i);
+            Node firedNode = findFiredNode(object);
+            if (firedNode.getDepth() > 0) {
+                if (firedNode.getConclusion().equals("B"))
+                    sb.append(" " + wordtags.get(i).form);
+                else
+                    sb.append("_" + wordtags.get(i).form);
+            }
+            else {// Fired at root, return initialized tag
+                if (wordtags.get(i).tag.equals("B"))
+                    sb.append(" " + wordtags.get(i).form);
+                else
+                    sb.append("_" + wordtags.get(i).form);
+            }
+        }
+        return sb.toString().trim();
+    }
+
+}
+
diff --git a/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/WordTag.java b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/WordTag.java
new file mode 100644
index 0000000000000000000000000000000000000000..df10fc239e3a64f3c844aff7a74dcc4cfc729f26
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/corenlp/wordsegmenter/WordTag.java
@@ -0,0 +1,13 @@
+package vn.corenlp.wordsegmenter;
+
+public class WordTag {
+    public String word;
+    public String tag;
+    public String form;
+
+    public WordTag(String iword, String itag) {
+        form = iword;
+        word = iword.toLowerCase();
+        tag = itag;
+    }
+}
\ No newline at end of file
diff --git a/VnCoreNLP/src/main/java/vn/pipeline/Annotation.java b/VnCoreNLP/src/main/java/vn/pipeline/Annotation.java
new file mode 100644
index 0000000000000000000000000000000000000000..fd0d0f78787601bfcacf52cd07e7fdee93c88d48
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/pipeline/Annotation.java
@@ -0,0 +1,147 @@
+package vn.pipeline;
+
+import java.io.*;
+import java.util.*;
+
+public class Annotation {
+    private String rawText;
+    private List<String> tokens;
+    private String wordSegmentedText;
+    private List<Word> words;
+    private List<Sentence> sentences;
+
+    public Annotation(String rawText) {
+        this.rawText = rawText.trim();
+        this.tokens = new ArrayList<>();
+        this.wordSegmentedText = "";
+        this.words = new ArrayList<>();
+    }
+
+    public String detectLanguage() {
+        try {
+            return Utils.detectLanguage(rawText);
+        } catch (IOException e) {
+            System.err.println("Cannot detect language!");
+        }
+        // Can't detect language
+        return "N/A";
+    }
+
+    public static boolean isAlphabetic(String str) {
+        char[] chars = str.toCharArray();
+
+        for (char c : chars) {
+            if (!Character.isLetter(c)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    @Override
+    public String toString() {
+        StringBuffer sb = new StringBuffer();
+        if(sentences != null)
+            for(Sentence sentence : sentences) {
+                sb.append(sentence.toString() + "\n\n");
+            }
+        else return rawText;
+        return sb.toString();
+    }
+
+    // Word count
+    public HashMap<String, Integer> wordCount() {
+        HashMap<String, Integer> output = new HashMap<>();
+        for (Word np : words) {
+            String w = np.getForm();
+            if (!output.containsKey(w)) output.put(w, 1);
+            else output.put(w, output.get(w) + 1);
+        }
+        return output;
+    }
+
+    public LinkedHashMap<String, Integer> ngrams(int n, boolean isWordLevel) {
+        if (!isWordLevel) return ngramAtCharacterLevel(n);
+        return ngramAtWordLevel(n);
+    }
+
+    private LinkedHashMap<String, Integer> ngramAtCharacterLevel(int n) {
+        LinkedHashMap<String, Integer> output = new LinkedHashMap<>();
+        for (int i = 0; i < this.rawText.length(); i++) {
+            StringBuffer sb = new StringBuffer();
+            if (i + n <= this.rawText.length()) {
+                for (int j = i; j < i + n; j++)
+                    sb.append(this.rawText.charAt(j));
+                String ngram = sb.toString();
+                if (!output.containsKey(ngram)) output.put(ngram, 1);
+                else output.put(ngram, output.get(ngram) + 1);
+            }
+        }
+        return output;
+    }
+
+    private LinkedHashMap<String, Integer> ngramAtWordLevel(int n) {
+        LinkedHashMap<String, Integer> output = new LinkedHashMap<>();
+        for (int i = 0; i < this.tokens.size(); i++) {
+            StringBuffer sb = new StringBuffer();
+            if (i + n <= this.tokens.size()) {
+                for (int j = i; j < i + n; j++)
+                    sb.append(this.tokens.get(j) + " ");
+                String ngram = sb.toString();
+                if (!output.containsKey(ngram)) output.put(ngram, 1);
+                else output.put(ngram, output.get(ngram) + 1);
+            }
+        }
+        return output;
+    }
+
+    public String getRawText() {
+        return rawText;
+    }
+
+    public List<Sentence> getSentences() {
+        return sentences;
+    }
+
+    public List<String> getTokens() {
+        return tokens;
+    }
+
+    public String getWordSegmentedText() {
+        return wordSegmentedText;
+    }
+
+
+    public String getWordSegmentedTaggedText() {
+        StringBuffer wordSegmentedTaggedText = new StringBuffer();
+        for(Sentence sentence : sentences) {
+            wordSegmentedTaggedText.append(sentence.getWordSegmentedTaggedSentence() + " ");
+        }
+        return wordSegmentedTaggedText.toString().trim();
+    }
+
+    public List<Word> getWords() {
+        return words;
+    }
+
+    public void setRawText(String rawText) {
+        this.rawText = rawText;
+    }
+
+    public void setTokens(List<String> tokens) {
+        this.tokens = tokens;
+    }
+
+    public void setWordSegmentedText(String wordSegmentedText) {
+        this.wordSegmentedText = wordSegmentedText;
+    }
+
+    public void setWords(List<Word> words) {
+        this.words = words;
+    }
+
+    public void setSentences(List<Sentence> sentences) {
+        this.sentences = sentences;
+    }
+}
diff --git a/VnCoreNLP/src/main/java/vn/pipeline/LexicalInitializer.java b/VnCoreNLP/src/main/java/vn/pipeline/LexicalInitializer.java
new file mode 100644
index 0000000000000000000000000000000000000000..0f8f7535ca540801d1117ad401839a715caa2098
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/pipeline/LexicalInitializer.java
@@ -0,0 +1,82 @@
+package vn.pipeline;
+
+import edu.emory.mathcs.nlp.component.template.lexicon.GlobalLexica;
+
+import org.apache.log4j.Logger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+
+public class LexicalInitializer {
+    private static LexicalInitializer lexicalInitializer;
+    private HashMap<String, String> lexicalMap ;
+    private boolean initLexica = false;
+    private GlobalLexica globalLexica;
+
+    public final static Logger LOGGER = Logger.getLogger(LexicalInitializer.class);
+
+    public LexicalInitializer(boolean initLexica) throws IOException {
+
+        this.initLexica = initLexica;
+        this.lexicalMap = new HashMap<>();
+        
+        String lexicalPath = Utils.jarDir + "/models/ner/vi-500brownclusters.xz";
+        if (!new File(lexicalPath).exists())
+            throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
+        lexicalMap.put("word_clusters", lexicalPath);
+        
+        lexicalPath = Utils.jarDir + "/models/ner/vi-pretrainedembeddings.xz";
+        if (!new File(lexicalPath).exists())
+            throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
+        lexicalMap.put("word_embeddings", lexicalPath);
+    }
+
+    public static LexicalInitializer initialize(boolean initLexica) throws IOException {
+        if (lexicalInitializer == null) {
+            lexicalInitializer = new LexicalInitializer(initLexica);
+            lexicalInitializer.initializeLexica();
+        }
+        return lexicalInitializer;
+    }
+
+    public GlobalLexica initializeLexica() {
+        if (globalLexica == null && initLexica)
+            try {
+
+                DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
+                DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
+                Document xmlDoc = docBuilder.newDocument();
+                Element root = xmlDoc.createElement("root");
+                Element lexicals = xmlDoc.createElement("lexica");
+                for(String lexicalName : lexicalMap.keySet()) {
+                    Element lexical = xmlDoc.createElement(lexicalName);
+                    lexical.setAttribute("field", "word_form_lowercase");
+                    if(!new File(lexicalMap.get(lexicalName)).exists()) return null;
+                    lexical.appendChild(xmlDoc.createTextNode(lexicalMap.get(lexicalName)));
+                    lexicals.appendChild(lexical);
+                }
+                root.appendChild(lexicals);
+
+                java.util.logging.Logger globalLogger = java.util.logging.Logger.getLogger("");
+                globalLogger.setLevel(Level.OFF);
+                Handler[] handlers = globalLogger.getHandlers();
+                for(Handler handler : handlers) {
+                    globalLogger.removeHandler(handler);
+                }
+
+                globalLexica = new GlobalLexica<>(root);
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+            return globalLexica;
+    }
+
+
+}
diff --git a/VnCoreNLP/src/main/java/vn/pipeline/Sentence.java b/VnCoreNLP/src/main/java/vn/pipeline/Sentence.java
new file mode 100644
index 0000000000000000000000000000000000000000..46d27fddc2322443798f7e05e22626538aac016f
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/pipeline/Sentence.java
@@ -0,0 +1,110 @@
+package vn.pipeline;
+
+import vn.corenlp.ner.NerRecognizer;
+import vn.corenlp.parser.DependencyParser;
+import vn.corenlp.postagger.PosTagger;
+import vn.corenlp.wordsegmenter.WordSegmenter;
+import vn.corenlp.tokenizer.Tokenizer;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class Sentence {
+    private String rawSentence;
+    private List<String> tokens;
+    private String wordSegmentedSentence;
+
+    private List<Word> words;
+
+    private WordSegmenter wordSegmenter ;
+    private PosTagger posTagger;
+    private NerRecognizer nerRecognizer;
+    private DependencyParser dependencyParser;
+
+    public Sentence(String rawSentence, WordSegmenter wordSegmenter, PosTagger tagger, NerRecognizer nerRecognizer, DependencyParser dependencyParser) throws IOException {
+        this.posTagger = tagger;
+        this.nerRecognizer = nerRecognizer;
+        this.dependencyParser = dependencyParser;
+        this.wordSegmenter = wordSegmenter;
+        init(rawSentence.trim());
+    }
+
+
+    public String detectLanguage() {
+        try {
+            return Utils.detectLanguage(rawSentence);
+        } catch (IOException e) {
+            System.err.println("Cannot detect language!");
+        }
+        // Can't detect language
+        return "N/A";
+    }
+
+    private void init(String rawSentence) throws IOException {
+        this.rawSentence = rawSentence;
+        this.tokens = Tokenizer.tokenize(this.rawSentence);
+
+        if(this.wordSegmenter != null) {
+            this.wordSegmentedSentence = this.wordSegmenter.segmentTokenizedString(this.rawSentence);
+        }
+        else this.wordSegmentedSentence = String.join(" ", this.tokens);
+
+        this.createWords();
+
+    }
+
+    private void createWords() throws IOException {
+
+        if (this.posTagger != null)
+            this.words = posTagger.tagSentence(this.wordSegmentedSentence);
+        else {
+            this.words = new ArrayList<>();
+            String[] segmentedTokens = this.wordSegmentedSentence.split(" ");
+            for (int i = 0; i < segmentedTokens.length; i++) {
+                Word word = new Word((i+1), segmentedTokens[i]);
+                this.words.add(word);
+            }
+        }
+
+        if (this.nerRecognizer != null)
+            this.nerRecognizer.tagSentence(this.words);
+        if (this.dependencyParser != null)
+            this.dependencyParser.tagSentence(this.words);
+
+    }
+
+    @Override
+    public String toString() {
+        StringBuffer sb = new StringBuffer();
+        for (Word word : words) {
+            sb.append(word.toString() + "\n");
+        }
+        return sb.toString().trim();
+    }
+
+    public String getRawSentence() {
+        return rawSentence;
+    }
+
+    public List<String> getTokens() {
+        return tokens;
+    }
+
+    public String getWordSegmentedSentence() {
+        return wordSegmentedSentence;
+    }
+
+    public List<Word> getWords() {
+        return words;
+    }
+
+    public String getWordSegmentedTaggedSentence() {
+        StringBuffer wordSegmentedTaggedSentence = new StringBuffer();
+        for(Word word : this.words) {
+            wordSegmentedTaggedSentence.append(word.toString() + " ");
+        }
+        return wordSegmentedTaggedSentence.toString().trim();
+    }
+
+}
diff --git a/VnCoreNLP/src/main/java/vn/pipeline/Utils.java b/VnCoreNLP/src/main/java/vn/pipeline/Utils.java
new file mode 100644
index 0000000000000000000000000000000000000000..c4c8f04befe3dadb531491ea4380e72d88b1e364
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/pipeline/Utils.java
@@ -0,0 +1,31 @@
+package vn.pipeline;
+
+import com.optimaize.langdetect.DetectedLanguage;
+import com.optimaize.langdetect.LanguageDetector;
+import com.optimaize.langdetect.LanguageDetectorBuilder;
+import com.optimaize.langdetect.ngram.NgramExtractors;
+import com.optimaize.langdetect.profiles.LanguageProfileReader;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+public class Utils {
+    private static File jarFile = new File(VnCoreNLP.class.getProtectionDomain().getCodeSource().getLocation().getPath());
+    public static String jarDir = jarFile.getParentFile().getPath();
+    
+    private static LanguageDetector languageDetector = null;
+    public static String detectLanguage(String text) throws IOException{
+        if(languageDetector == null) {
+            languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
+                    .shortTextAlgorithm(0)
+                    .withProfiles(new LanguageProfileReader().readAllBuiltIn())
+                    .build();
+        }
+        List<DetectedLanguage> detectedLanguages = languageDetector.getProbabilities(text);
+        if(detectedLanguages.size() > 0)
+            return detectedLanguages.get(0).getLocale().getLanguage();
+        return "N/A";
+    }
+
+}
diff --git a/VnCoreNLP/src/main/java/vn/pipeline/VnCoreNLP.java b/VnCoreNLP/src/main/java/vn/pipeline/VnCoreNLP.java
new file mode 100644
index 0000000000000000000000000000000000000000..c965d5839398876fcc3b9d310905e345f3b617e9
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/pipeline/VnCoreNLP.java
@@ -0,0 +1,134 @@
+package vn.pipeline;
+
+import org.apache.log4j.Logger;
+import vn.corenlp.ner.NerRecognizer;
+import vn.corenlp.parser.DependencyParser;
+import vn.corenlp.postagger.PosTagger;
+import vn.corenlp.tokenizer.Tokenizer;
+import vn.corenlp.wordsegmenter.WordSegmenter;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+
+
+public class VnCoreNLP {
+
+    public final static Logger LOGGER = Logger.getLogger(Annotation.class);
+
+    private WordSegmenter wordSegmenter;
+    private PosTagger posTagger;
+    private NerRecognizer nerRecognizer;
+    private DependencyParser dependencyParser;
+
+    public VnCoreNLP() throws IOException {
+        String[] annotators = {"wseg", "pos", "ner", "parse"};
+        initAnnotators(annotators);
+    }
+
+    public VnCoreNLP(String[] annotators) throws IOException {
+        initAnnotators(annotators);
+
+    }
+
+    public void initAnnotators(String[] annotators) throws IOException{
+        for(String annotator : annotators) {
+            switch (annotator.trim()) {
+                case "parse":
+                    this.dependencyParser = DependencyParser.initialize();
+                    break;
+                case "ner":
+                    this.nerRecognizer = NerRecognizer.initialize();
+                    break;
+                case "pos":
+                    this.posTagger = PosTagger.initialize();
+                    break;
+                case "wseg":
+                    this.wordSegmenter = WordSegmenter.initialize();
+                    break;
+            }
+        }
+
+    }
+
+    public void printToFile(Annotation annotation, PrintStream printer) throws IOException {
+        for(Sentence sentence : annotation.getSentences()) {
+            printer.println(sentence.toString());
+        }
+    }
+
+    public void printToFile(Annotation annotation, String fileOut) throws IOException {
+        PrintStream printer = new PrintStream(fileOut, "UTF-8");
+        for(Sentence sentence : annotation.getSentences()) {
+            printer.println(sentence.toString() + "\n");
+        }
+    }
+
+    public void annotate(Annotation annotation) throws IOException {
+        List<String> rawSentences = Tokenizer.joinSentences(Tokenizer.tokenize(annotation.getRawText()));
+        annotation.setSentences(new ArrayList<>());
+        for (String rawSentence : rawSentences) {
+            if (rawSentence.trim().length() > 0) {
+                Sentence sentence = new Sentence(rawSentence, wordSegmenter, posTagger, nerRecognizer, dependencyParser);
+                annotation.getSentences().add(sentence);
+                annotation.getTokens().addAll(sentence.getTokens());
+                annotation.getWords().addAll(sentence.getWords());
+                annotation.setWordSegmentedText(annotation.getWordSegmentedTaggedText() + sentence.getWordSegmentedSentence() + " ");
+            }
+
+        }
+
+        annotation.setWordSegmentedText(annotation.getWordSegmentedTaggedText().trim());
+
+    }
+
+    public static void printUsage() {
+        System.out.println("Usage: \n\t-fin inputFile (required)\n\t-fout outputFile (optional, default: inputFile.out)\n" +
+                "\t-annotators functionNames (optional, default: wseg,pos,ner,parse)" +
+                "\nExample 1: -fin sample_input.txt -fout output.txt" +
+                "\nExample 2: -fin sample_input.txt -fout output.txt -annotators wseg,pos,ner");
+    }
+
+    public static void processPipeline(String fileIn, String fileOut, String[] annotators) throws IOException{
+
+        FileInputStream fis = new FileInputStream(new File(fileIn));
+        InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
+        OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(new File(fileOut)), "UTF-8");
+
+        BufferedReader br = new BufferedReader(isr);
+        VnCoreNLP pipeline = new VnCoreNLP(annotators);
+        LOGGER.info("Start processing " + fileIn);
+        while(br.ready()) {
+            String line = br.readLine();
+            if (line.trim().length() > 0) {
+                Annotation annotation = new Annotation(line);
+                pipeline.annotate(annotation);
+                osw.write(annotation.toString());
+            }
+        }
+        br.close();
+        isr.close();
+        fis.close();
+        osw.close();
+        LOGGER.info("Wrote output to " +  fileOut);
+    }
+
+    public static void main(String[] args) throws IOException {
+        String fileIn = null, fileOut = null;
+        String[] annotators = {"wseg", "pos", "ner", "parse"};
+        for(int i = 0; i < args.length; i++) {
+            if (args[i].equals("-fin") && i + 1 < args.length) fileIn = args[i+1];
+            else if (args[i].equals("-fout") && i + 1 < args.length) fileOut = args[i+1];
+            else if (args[i].equals("-annotators") && i + 1 < args.length) annotators = args[i+1].split(",");
+        }
+
+        if (fileIn == null) {
+            printUsage();
+            return;
+        }
+
+        if (fileOut == null) fileOut = fileIn + ".out";
+        processPipeline(fileIn, fileOut, annotators);
+    }
+
+}
diff --git a/VnCoreNLP/src/main/java/vn/pipeline/Word.java b/VnCoreNLP/src/main/java/vn/pipeline/Word.java
new file mode 100644
index 0000000000000000000000000000000000000000..ece727ef8dd25c0f2d416071c88d6cb9cbdf03f3
--- /dev/null
+++ b/VnCoreNLP/src/main/java/vn/pipeline/Word.java
@@ -0,0 +1,111 @@
+package vn.pipeline;
+
+public class Word {
+    private int index = -1;
+    private String form;
+    private String posTag;
+    private String nerLabel;
+    private int head = -1;
+    private String depLabel;
+
+    public Word(Word word) {
+        this.index = word.index;
+        this.form = word.form;
+        this.posTag = word.posTag;
+        this.nerLabel = word.nerLabel;
+        this.head = word.head;
+        this.depLabel = word.depLabel;
+    }
+    public Word(int index, String form, String posTag) {
+        this.index = index;
+        this.form = form;
+        this.posTag = posTag;
+    }
+
+    public Word(int index, String form) {
+        this.index = index;
+        this.form = form;
+    }
+
+    public Word(int index, String form, String posTag, String nerLabel) {
+        this.index = index;
+        this.form = form;
+        this.posTag = posTag;
+        this.nerLabel = nerLabel;
+    }
+
+    public Word(int index, String form, String posTag, String nerLabel, String chunkingLabel) {
+        this.index = index;
+        this.form = form;
+        this.posTag = posTag;
+        this.nerLabel = nerLabel;
+    }
+
+    public Word(int index, String form, String posTag, String nerLabel, int head, String depLabel, String chunkingLabel) {
+        this.index = index;
+        this.form = form;
+        this.posTag = posTag;
+        this.nerLabel = nerLabel;
+        this.head = head;
+        this.depLabel = depLabel;
+    }
+
+    @Override
+    public String toString() {
+        return this.getIndex() + "\t" +
+                this.getForm() + "\t" +
+                (this.getPosTag() == null?"_": this.getPosTag()) +  "\t" +
+                (this.getNerLabel() == null?"_": this.getNerLabel()) + "\t" +
+                (this.getHead() == -1?"_\t": this.getHead()) + "\t" +
+                (this.getDepLabel() == null?"_": this.getDepLabel());
+    }
+
+    public String getForm() {
+        return form;
+    }
+
+    public void setForm(String form) {
+        this.form = form;
+    }
+
+    public String getPosTag() {
+        return posTag;
+    }
+
+    public void setPosTag(String pos) {
+        this.posTag = pos;
+    }
+
+    public String getNerLabel() {
+        return nerLabel;
+    }
+
+    public void setNerLabel(String nerLabel) {
+        this.nerLabel = nerLabel;
+    }
+
+    public int getIndex() {
+        return index;
+    }
+
+    public void setIndex(int index) {
+        this.index = index;
+    }
+
+    public int getHead() {
+        return head;
+    }
+
+    public void setHead(int head) {
+        this.head = head;
+    }
+
+    public String getDepLabel() {
+        return depLabel;
+    }
+
+    public void setDepLabel(String depLabel) {
+        this.depLabel = depLabel;
+    }
+
+}
diff --git a/VnCoreNLP/src/main/resources/log4j.properties b/VnCoreNLP/src/main/resources/log4j.properties
new file mode 100644
index 0000000000000000000000000000000000000000..6727ef4644df766404413d48349332cdcc23120d
--- /dev/null
+++ b/VnCoreNLP/src/main/resources/log4j.properties
@@ -0,0 +1,10 @@
+# Root logger option
+
+log4j.rootLogger=INFO, stdout
+log4j.logger.edu.emory.mathcs.nlp=OFF
+
+# Direct log messages to stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
diff --git a/VnCoreNLP/src/test/java/VnCoreNLPExample.java b/VnCoreNLP/src/test/java/VnCoreNLPExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..b3e0ce7ea2afe3098f50e36b0a4fe992cbcf6ef8
--- /dev/null
+++ b/VnCoreNLP/src/test/java/VnCoreNLPExample.java
@@ -0,0 +1,30 @@
+import vn.pipeline.*;
+import java.io.*;
+public class VnCoreNLPExample {
+    public static void main(String[] args) throws IOException {
+    
+        // "wseg", "pos", "ner", and "parse" refer to as word segmentation, POS tagging, NER and dependency parsing, respectively. 
+        String[] annotators = {"wseg", "pos", "ner", "parse"}; 
+        VnCoreNLP pipeline = new VnCoreNLP(annotators); 
+    
+        String str = "Ông Nguyễn Khắc Chúc  đang làm việc tại Đại học Quốc gia Hà Nội. "
+                    + "Bà Lan, vợ ông Chúc, cũng làm việc tại đây."; 
+        Annotation annotation = new Annotation(str); 
+        pipeline.annotate(annotation); 
+        
+        System.out.println(annotation.toString());
+        // 1    Ông                 Nc  O       4   sub 
+        // 2    Nguyễn_Khắc_Chúc    Np  B-PER   1   nmod
+        // 3    đang                R   O       4   adv
+        // 4    làm_việc            V   O       0   root
+        // ...
+        
+        //Write to file
+        PrintStream outputPrinter = new PrintStream("output.txt");
+        pipeline.printToFile(annotation, outputPrinter); 
+    
+        // You can also get a single sentence to analyze individually 
+        Sentence firstSentence = annotation.getSentences().get(0);
+        System.out.println(firstSentence.toString());
+    }
+}
\ No newline at end of file
diff --git a/__pycache__/app.cpython-38.pyc b/__pycache__/app.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5174ca3fdee09bb00d9d2cb09620a1cea39ceb7b
Binary files /dev/null and b/__pycache__/app.cpython-38.pyc differ
diff --git a/api/__pycache__/tracker.cpython-38.pyc b/api/__pycache__/tracker.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..29a8ea595125add33e60f039df50434bd9a78790
Binary files /dev/null and b/api/__pycache__/tracker.cpython-38.pyc differ
diff --git a/api/detector.py b/api/detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d17009c6983bc16f60be3f9e79dc59406dd5fdf
--- /dev/null
+++ b/api/detector.py
@@ -0,0 +1,37 @@
+import sys
+sys.path.append("./")
+import uvicorn
+import shutil
+
+from fastapi import FastAPI, File, UploadFile, Request
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+from fastapi.staticfiles import StaticFiles
+from src.ss.signboard_detect import SignBoardDetector
+
+signboardDetector = SignBoardDetector(checkpoint="./checkpoints/ss/ss.ckpt")
+
+app = FastAPI()
+app.mount("/static", StaticFiles(directory="static"), name="static")
+templates = Jinja2Templates(directory="templates")
+
+@app.get("/upload", response_class=HTMLResponse)
+async def upload(request: Request):
+    return templates.TemplateResponse("detector.html", {"request": request})
+
+@app.post("/uploadfile")
+async def create_upload_file(file: UploadFile = File(...)):
+    if file.content_type.split("/")[0] != "image":
+        return {"file_name": file.filename,
+                "file_type": file.content_type,
+                "warning": "Need a image file instead."}
+    else:
+        with open(f"./static/images/{file.filename}", "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        results = signboardDetector.inference_signboard(image=[f"./static/images/{file.filename}"], score=0.7)
+        file.file.close()
+        return {"results": results}
+
+if __name__ == "__main__":
+    # main()
+    uvicorn.run("api.detector:app", reload=True, port=8001)
diff --git a/api/ir.py b/api/ir.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/api/tracker.py b/api/tracker.py
new file mode 100644
index 0000000000000000000000000000000000000000..959de429749b0ec0bd5f6ca5c9d385d96d2b3af3
--- /dev/null
+++ b/api/tracker.py
@@ -0,0 +1,44 @@
+import sys
+sys.path.append("./")
+import os
+import uvicorn
+import shutil
+
+from fastapi import FastAPI, File, UploadFile, Request
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+from fastapi.staticfiles import StaticFiles
+from src.tracker.signboard_track import SignboardTracker
+
+signboardTracker = SignboardTracker()
+
+app = FastAPI()
+app.mount("/static", StaticFiles(directory="static"), name="static")
+templates = Jinja2Templates(directory="templates")
+
+@app.get("/upload", response_class=HTMLResponse)
+async def upload(request: Request):
+    return templates.TemplateResponse("tracker.html", {"request": request})
+
+@app.post("/uploadfile")
+async def create_upload_file(file: UploadFile = File(...)):
+    if file.content_type.split("/")[0] != "video":
+        return {"file_name": file.filename,
+                "file_type": file.content_type,
+                "warning": "Need a video file instead."}
+    else:
+        output_folder = ('.').join(file.filename.split('.')[:-1])
+        if os.path.exists("./static/videos/" + output_folder):
+            shutil.rmtree("./static/videos/" + output_folder)
+        os.mkdir("./static/videos/" + output_folder)
+        os.mkdir("./static/videos/" + output_folder + "/frames")
+        with open(f"./static/videos/{output_folder}/{file.filename}", "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        output = f"./static/videos/{output_folder}/{('.').join(file.filename.split('.')[:-1])}_traked.{file.filename.split('.')[-1]}"
+        output_format = "mp4v"
+        results = signboardTracker.inference_signboard(f"./static/videos/{output_folder}/{file.filename}", output, output_format, "./static/videos/" + output_folder + "/frames")
+        return {"results": results}
+
+if __name__ == "__main__":
+    # main()
+    uvicorn.run("api.tracker:app", reload=True, port=8000)
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d50604c43b43cadf3bd99c577217aeac1df4416
--- /dev/null
+++ b/app.py
@@ -0,0 +1,291 @@
+import numpy as np
+import gradio as gr
+import cv2
+import os
+import shutil
+import re
+import torch 
+import csv
+import time
+
+from src.sts.demo.sts import handle_sts
+from src.ir.ir import handle_ir
+from src.ir.src.models.tc_classifier import TCClassifier
+from src.tracker.signboard_track import SignboardTracker
+
+from omegaconf import DictConfig
+from hydra import compose, initialize
+
+signboardTracker = SignboardTracker()
+
+tracking_result_dir = ""
+output_track_format = "mp4v"
+output_track = ""
+output_sts = ""
+video_dir = ""
+vd_dir = ""
+labeling_dir = ""
+
+frame_out = {}
+rs = {}
+results = []
+
+# with initialize(version_base=None, config_path="src/ir/configs", job_name="ir"):
+#         config = compose(config_name="test")
+# config: DictConfig
+# model_ir = TCClassifier(config.model.train.model_name,
+#                         config.model.train.n_classes,
+#                         config.model.train.lr,
+#                         config.model.train.scheduler_type,
+#                         config.model.train.max_steps,
+#                         config.model.train.weight_decay,
+#                         config.model.train.classifier_dropout,
+#                         config.model.train.mixout,
+#                         config.model.train.freeze_encoder)
+# model_ir = model_ir.load_from_checkpoint(checkpoint_path=config.ckpt_path, map_location=torch.device("cuda"))
+
+def create_dir(list_dir_path):
+    for dir_path in list_dir_path:
+        if not os.path.isdir(dir_path):
+            os.makedirs(dir_path)
+
+def get_meta_from_video(input_video):
+    if input_video is not None:
+        video_name = os.path.basename(input_video).split('.')[0]
+
+    global video_dir
+    video_dir = os.path.join("static/videos/", f"{video_name}")
+
+    global vd_dir
+    vd_dir = os.path.join(video_dir, os.path.basename(input_video))
+
+    global output_track
+    output_track = os.path.join(video_dir,"original")
+
+    global tracking_result_dir
+    tracking_result_dir = os.path.join(video_dir,"track/cropped")
+
+    global output_sts
+    output_sts = os.path.join(video_dir,"track/sts")
+
+    global labeling_dir
+    labeling_dir = os.path.join(video_dir,"track/labeling")
+
+    if os.path.isdir(video_dir):
+        return None
+    else:
+        create_dir([output_track, video_dir, os.path.join(video_dir, "track/segment"), output_sts, tracking_result_dir, labeling_dir])
+
+        # initialize the video stream
+        video_cap = cv2.VideoCapture(input_video)
+        # grab the width, height, and fps of the frames in the video stream.
+        frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = int(video_cap.get(cv2.CAP_PROP_FPS))
+
+        #tổng Fps
+        # total_frames = int(video_cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # print(total_frames)
+        # # Tính tổng số giây trong video
+        # total_seconds = total_frames / video_cap.get(cv2.CAP_PROP_FPS)
+        # print(total_seconds)
+
+        # initialize the FourCC and a video writer object
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        output = cv2.VideoWriter(vd_dir, fourcc, fps, (frame_width, frame_height))
+
+        while True:
+            success, frame = video_cap.read()
+            # write the frame to the output file
+            if success == True:
+                output.write(frame)
+            else:
+                break
+        # print(fps)
+        # return gr.Slider(1, fps, value=4, label="FPS",step=1, info="Choose between 1 and {fps}", interactive=True)
+        return gr.Textbox(value=fps)
+
+def get_signboard(evt: gr.SelectData):
+    name_fr = int(evt.index) + 1
+    ids_dir = tracking_result_dir
+    all_ids = os.listdir(ids_dir)
+    gallery=[]
+    for i in all_ids:
+        fr_id = str(name_fr)
+        al = re.search("[\d]*_"+fr_id+".png", i)
+        if al:
+            id_dir = os.path.join(ids_dir, i)
+            gallery.append(id_dir)
+    gallery = sorted(gallery)
+    return gallery, name_fr
+
+def tracking(fps_target):
+    start = time.time()
+    fps_target = int(fps_target)
+    global results
+    results = signboardTracker.inference_signboard(fps_target, vd_dir, output_track, output_track_format, tracking_result_dir)[0]
+    # print("result", results)
+    fd = []
+    global frame_out
+    list_id = []
+
+    with open(os.path.join(video_dir, "track/label.csv"), 'w', newline='') as file:
+        writer = csv.writer(file)
+        writer.writerow(["Signboard", "Frame", "Text"])
+
+    for frame, values in results.items():
+        frame_dir = os.path.join(output_track, f"{frame}.jpg")
+        # segment = os.path.join(video_dir,"segment/" + f"{frame}.jpg")
+        list_boxs = []
+        full = []
+        list_id_tmp = []
+        # print("values", values)
+        for value in values:
+
+            list_boxs.append(value['box'])  
+            list_id_tmp.append(value['id'])
+        _, dict_rec_sign_out = handle_sts(frame_dir, labeling_dir, list_boxs, list_id_tmp)
+
+        # predicted = handle_ir(frame_dir, dict_rec_sign_out, os.path.join(video_dir, "ir"))
+        # print(predicted)
+
+        # fd.append(frame_dir)
+        # frame_out[frame] = full
+        list_id.extend(list_id_tmp)
+    list_id = list(set(list_id))
+    # print(list_id)
+    print(time.time()-start)
+    return gr.Dropdown(label="signboard",choices=list_id, interactive=True)
+
+
+def get_select_index(img_id, evt: gr.SelectData):
+    ids_dir = tracking_result_dir
+    # print(ids_dir)
+    all_ids = os.listdir(ids_dir)
+    gallery = []
+    for i in all_ids:
+        fr_id = str(img_id)
+        al = re.search("[\d]*_"+fr_id+".png", i)
+        if al:
+            id_dir = os.path.join(ids_dir, i)
+            gallery.append(id_dir)
+    gallery = sorted(gallery)
+
+    gallery_id=[]
+    id_name = gallery[evt.index]
+    id = os.path.basename(id_name).split(".")[0].split("_")[0]
+    for i in all_ids:
+        al = re.search("^" +id + "_[\d]*.png", i)
+        if al:
+            id_dir = os.path.join(ids_dir, i)
+            gallery_id.append(id_dir)
+    gallery_id = sorted(gallery_id)
+    return gallery_id
+
+id_glb = None
+def select_id(evt: gr.SelectData):
+    choice=[]
+    global id_glb
+    id_glb = evt.value
+    for key, values in results.items():
+        for value in values:
+            if value['id'] == evt.value:
+                choice.append(int(key))
+    return gr.Dropdown(label="frame", choices=choice, interactive=True)
+
+
+import pandas as pd
+
+frame_glb = None
+def select_frame(evt: gr.SelectData):
+    full_img = os.path.join(output_track, str(evt.value) + ".jpg")
+    crop_img = os.path.join(tracking_result_dir, str(id_glb) + "_" + str(evt.value) + ".png")
+
+    global frame_glb
+    frame_glb = evt.value
+    data = pd.read_csv(os.path.join(labeling_dir, str(id_glb) + "_" + str(frame_glb) + '.csv'), header=0)
+
+    return full_img, crop_img, data
+
+def get_data(dtfr):
+    print(dtfr)
+    
+    # df = pd.read_csv(os.path.join(video_dir, "track/label.csv"))
+    # for i, row in df.iterrows():
+    #     if str(row["Signboard"]) == str(id_tmp) and str(row["Frame"]) == str(frame_tmp):
+    #         # print(row["Text"])
+    #         df_new = df.replace(str(row["Text"]), str(labeling))
+    # print(df_new)
+    dtfr.to_csv(os.path.join(labeling_dir, str(id_glb) + "_" + str(frame_glb) + '.csv'), index=False, header=True)
+    return
+
+def seg_track_app():
+    ##########################################################
+    ######################  Front-end ########################
+    ##########################################################
+    with gr.Blocks(css=".gradio-container {background-color: white}") as demo:
+        gr.Markdown(
+            '''
+            <div style="text-align:center;">
+                <span style="font-size:3em; font-weight:bold;">POI Engineeing</span>
+            </div>
+            '''
+        )
+        with gr.Row():
+            # video input
+            with gr.Column(scale=0.2):
+
+                tab_video_input = gr.Row(label="Video type input")
+                with tab_video_input:
+                    input_video = gr.Video(label='Input video')
+
+                tab_everything = gr.Row(label="Tracking")
+                with tab_everything:
+                    with gr.Row():
+                        seg_signboard = gr.Button(value="Tracking", interactive=True)
+                all_info = gr.Row(label="Information about video")
+                with all_info:
+                    with gr.Row():
+                        text = gr.Textbox(label="Fps")
+                        check_fps = gr.Textbox(label="Choose fps for output", interactive=True)
+
+            with gr.Column(scale=1):
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        with gr.Row():
+                            with gr.Column(scale=1):
+                                id_drop = gr.Dropdown(label="Signboards",choices=[])
+                            with gr.Column(scale=1):
+                                fr_drop = gr.Dropdown(label="Frames",choices=[])
+                        full_img = gr.Image(label="Full Image")
+
+                    with gr.Column(scale=1):
+                        crop_img = gr.Image(label="Cropped Image")
+                        with gr.Row():
+                            dtfr = gr.Dataframe(headers=["Tag", "Value"], datatype=["str", "str"], interactive=True)
+                        with gr.Row():
+                            submit = gr.Button(value="Submit", interactive=True)
+
+    ##########################################################
+    ######################  back-end #########################
+    ##########################################################
+        input_video.change(
+            fn=get_meta_from_video,
+            inputs=input_video,
+            outputs=text
+        )
+        seg_signboard.click(
+            fn=tracking,
+            inputs=check_fps,
+            outputs=id_drop
+        )
+
+        id_drop.select(select_id, None, fr_drop)
+        fr_drop.select(select_frame, None, [full_img,crop_img, dtfr])
+        submit.click(get_data, dtfr, None)
+
+    demo.queue(concurrency_count=1)
+    demo.launch(debug=True, enable_queue=True, share=True)
+
+if __name__ == "__main__":
+    seg_track_app()
diff --git a/backend/__init__.py b/backend/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7358eb208f6016838e8aa729e8eaad34fd9cb10
--- /dev/null
+++ b/backend/__init__.py
@@ -0,0 +1 @@
+from fastapi import FastAPI, Body
\ No newline at end of file
diff --git a/backend/main.py b/backend/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..513707623c9bcdf69d33b9fdbc14774b57c16f3d
--- /dev/null
+++ b/backend/main.py
@@ -0,0 +1,4 @@
+import uvicorn
+
+if __name__ == "__main__":
+    uvicorn.run("server.app:app", host="0.0.0.0", port=5000, reload=True)
\ No newline at end of file
diff --git a/backend/server/__pycache__/app.cpython-38.pyc b/backend/server/__pycache__/app.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5309062c21cd62a24cbb4c9e7c73d6d0f1bfcf84
Binary files /dev/null and b/backend/server/__pycache__/app.cpython-38.pyc differ
diff --git a/backend/server/__pycache__/database.cpython-38.pyc b/backend/server/__pycache__/database.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c2347e6d81e7e80e4ddc8e34f7de82a29aaceba
Binary files /dev/null and b/backend/server/__pycache__/database.cpython-38.pyc differ
diff --git a/backend/server/__pycache__/services.cpython-38.pyc b/backend/server/__pycache__/services.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0fff1b41b35d1615822081d623b6792999699672
Binary files /dev/null and b/backend/server/__pycache__/services.cpython-38.pyc differ
diff --git a/backend/server/app.py b/backend/server/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9759ae1447926e52ed04b1e2a3c4cce1d6fab38
--- /dev/null
+++ b/backend/server/app.py
@@ -0,0 +1,19 @@
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from server.routes.user import router as UserRouter
+
+app = FastAPI()
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.include_router(UserRouter, tags=["User"], prefix="/user")
+
+
+@app.get("/", tags=["Root"])
+async def read_root():
+    return {"message": "Welcome to labeling app!"}
\ No newline at end of file
diff --git a/backend/server/database.py b/backend/server/database.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5540db0bc8fe9b1fdf2788bf104c41ca4c100a5
--- /dev/null
+++ b/backend/server/database.py
@@ -0,0 +1,65 @@
+import motor.motor_asyncio
+from bson.objectid import ObjectId
+
+MONGO_DETAILS = "mongodb://localhost:27017"
+
+client = motor.motor_asyncio.AsyncIOMotorClient(MONGO_DETAILS)
+
+database = client.database
+
+user_collection = database.get_collection("users")
+
+# helpers
+def user_helper(user) -> dict:
+    return {
+        "id": str(user["_id"]),
+        "username": user["username"],
+        "email": user["email"],
+        "password": user["password"],
+        "projectname": user["projectname"],
+        "projectpath": user["projectpath"],
+    }
+
+# Retrieve all users present in the database
+async def retrieve_users():
+    users = []
+    async for user in user_collection.find():
+        users.append(user_helper(user))
+    return users
+
+
+# Add a new user into to the database
+async def add_user(user_data: dict) -> dict:
+    user = await user_collection.insert_one(user_data)
+    new_user = await user_collection.find_one({"_id": user.inserted_id})
+    return user_helper(new_user)
+
+
+# Retrieve a user with a matching ID
+async def retrieve_user(username: str) -> dict:
+    user = await user_collection.find_one({"username":username})
+    if user:
+        return user_helper(user)
+
+
+# Update a user with a matching ID
+async def update_user(username: str, data: dict):
+    # Return false if an empty request body is sent.
+    if len(data) < 1:
+        return False
+    user = await user_collection.find_one({"username": username})
+    if user:
+        updated_user = await user_collection.update_one(
+            {"username": username}, {"$set": data}
+        )
+        if updated_user:
+            return True
+        return False
+
+
+# Delete a user from the database
+async def delete_user(username: str):
+    user = await user_collection.find_one({"username": username})
+    if user:
+        await user_collection.delete_one({"username": username})
+        return True
\ No newline at end of file
diff --git a/backend/server/models/__pycache__/student.cpython-38.pyc b/backend/server/models/__pycache__/student.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..443b29b217cdb9b7ecfb4558b3b1417bd47e5625
Binary files /dev/null and b/backend/server/models/__pycache__/student.cpython-38.pyc differ
diff --git a/backend/server/models/__pycache__/user.cpython-38.pyc b/backend/server/models/__pycache__/user.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c684e0801c38dad9277b651f13aeb83f12fc970
Binary files /dev/null and b/backend/server/models/__pycache__/user.cpython-38.pyc differ
diff --git a/backend/server/models/user.py b/backend/server/models/user.py
new file mode 100644
index 0000000000000000000000000000000000000000..45aaa05ac4d2892ec07f0571e72c4a093f98bc06
--- /dev/null
+++ b/backend/server/models/user.py
@@ -0,0 +1,53 @@
+from typing import Optional, List
+
+from pydantic import BaseModel, EmailStr, Field
+
+
+class UserSchema(BaseModel):
+    username: str = Field(...)
+    email: str = Field(...)
+    password: str = Field(...)
+    projectname: List[str]
+    projectpath: List[str]
+
+    class Config:
+        schema_extra = {
+            "example": {
+                "username": "John Doe",
+                "email": "jdoe@x.edu.ng",
+                "password": "Water resources engineering",
+                "projectname": ["1", "2"],
+                "projectpath": ["1", "2"],
+            }
+        }
+
+
+class UpdateUserModel(BaseModel):
+    username: Optional[str]
+    email: Optional[EmailStr]
+    password: Optional[str]
+    projectname: List[str]
+    projectpath: List[str]
+
+    class Config:
+        schema_extra = {
+            "example": {
+                "username": "John Doe",
+                "email": "jdoe@x.edu.ng",
+                "password": "Water resources engineering",
+                "projectname": ["1", "2"],
+                "projectpath": ["1", "2"],
+            }
+        }
+
+
+def ResponseModel(data, message):
+    return {
+        "data": [data],
+        "code": 200,
+        "message": message,
+    }
+
+
+def ErrorResponseModel(error, code, message):
+    return {"error": error, "code": code, "message": message}
\ No newline at end of file
diff --git a/backend/server/routes/__pycache__/student.cpython-38.pyc b/backend/server/routes/__pycache__/student.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55e22794efe220e405649cac7134343d34ff0084
Binary files /dev/null and b/backend/server/routes/__pycache__/student.cpython-38.pyc differ
diff --git a/backend/server/routes/__pycache__/user.cpython-38.pyc b/backend/server/routes/__pycache__/user.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..624485556af536ee9267f380f56bc294df488c26
Binary files /dev/null and b/backend/server/routes/__pycache__/user.cpython-38.pyc differ
diff --git a/backend/server/routes/user.py b/backend/server/routes/user.py
new file mode 100644
index 0000000000000000000000000000000000000000..46ea81767b7edcf11dde788234c1b02568222186
--- /dev/null
+++ b/backend/server/routes/user.py
@@ -0,0 +1,81 @@
+from fastapi import APIRouter, Body, Form
+from fastapi.encoders import jsonable_encoder
+
+from typing_extensions import Annotated
+
+from server.services import login_user, get_project_user
+from server.database import (
+    add_user,
+    delete_user,
+    retrieve_user,
+    retrieve_users,
+    update_user,
+)
+from server.models.user import (
+    ErrorResponseModel,
+    ResponseModel,
+    UserSchema,
+    UpdateUserModel,
+)
+
+router = APIRouter()
+
+# CREATE
+@router.post("/", response_description="User data added into the database")
+async def add_user_data(user: UserSchema = Body(...)):
+    user = jsonable_encoder(user)
+    new_user = await add_user(user)
+    return ResponseModel(new_user, "User added successfully.")
+
+# READ
+@router.get("/", response_description="Users retrieved")
+async def get_users():
+    users = await retrieve_users()
+    if users:
+        return ResponseModel(users, "Users data retrieved successfully")
+    return ResponseModel(users, "Empty list returned")
+
+
+@router.get("/{username}", response_description="User data retrieved")
+async def get_user_data(username):
+    user = await retrieve_user(username)
+    if user:
+        return ResponseModel(user, "User data retrieved successfully")
+    return ErrorResponseModel("An error occurred.", 404, "User doesn't exist.")
+
+# UPDATE
+@router.put("/{username}")
+async def update_user_data(username: str, req: UpdateUserModel = Body(...)):
+    req = {k: v for k, v in req.dict().items() if v is not None}
+    updated_user = await update_user(username, req)
+    if updated_user:
+        return ResponseModel(
+            "User with ID: {} name update is successful".format(id),
+            "User name updated successfully",
+        )
+    return ErrorResponseModel(
+        "An error occurred",
+        404,
+        "There was an error updating the user data.",
+    )
+
+# DELETE
+@router.delete("/{username}", response_description="User data deleted from the database")
+async def delete_user_data(username: str):
+    deleted_user = await delete_user(username)
+    if deleted_user:
+        return ResponseModel(
+            "User with ID: {} removed".format(username), "User deleted successfully"
+        )
+    return ErrorResponseModel(
+        "An error occurred", 404, "User with username {0} doesn't exist".format(username)
+    )
+
+@router.post("/login", response_description="User login")
+async def login(username: Annotated[str, Form()], password: Annotated[str, Form()]):
+    return login_user(username, password)
+    # return {"username": username}
+
+@router.post("/get_project", response_description="User login")
+async def get_project(username: Annotated[str, Form()]):
+    return get_project_user(username)
\ No newline at end of file
diff --git a/backend/server/services.py b/backend/server/services.py
new file mode 100644
index 0000000000000000000000000000000000000000..74a3a4781d392f54d9ec86341e7fed61425bca30
--- /dev/null
+++ b/backend/server/services.py
@@ -0,0 +1,32 @@
+import motor.motor_asyncio
+from pymongo import MongoClient
+from fastapi.responses import JSONResponse, HTMLResponse
+
+# Mongodb setting
+client = MongoClient("mongodb://localhost:27017/")
+db = client.get_database("database")
+user_collection = db.get_collection("users")
+
+
+def login_user(username, password):
+    query = {"username": {"$eq": username}}
+    users = list(user_collection.find(query))
+    if len(users) > 0:
+        if password != str(users[0]['password']):
+            response = "Incorrect password"
+        else:
+            response = "Login Successfully"
+            return JSONResponse(content={"name": users[0]['username']}, status_code=200)
+    else:
+        response = "Incorrect username"
+    return JSONResponse(status_code=404, content={"message": response})
+
+
+def get_project_user(username):
+    query = {"username": {"$eq": username}}
+    users = list(user_collection.find(query))
+    if len(users) > 0:
+        return JSONResponse(content={"name": users[0]['projectname'], "path":users[0]["projectpath"]}, status_code=200)
+    else:
+        response = "Incorrect username"
+    return JSONResponse(status_code=404, content={"message": response})
\ No newline at end of file
diff --git a/checkpoints/ir/ir_2.bak b/checkpoints/ir/ir_2.bak
new file mode 100644
index 0000000000000000000000000000000000000000..a909d805f89bd4e1eeb7b371398bbf144eb08751
--- /dev/null
+++ b/checkpoints/ir/ir_2.bak
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f8f407d2fea8879b0a6b4e9ebb0915c76fd6f747847e99341f44b1ad3a35fff
+size 1620234049
diff --git a/checkpoints/ir/ir_2.ckpt b/checkpoints/ir/ir_2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2b9cb14ebaf5c55d9c73db847ee0e3d962a1520a
--- /dev/null
+++ b/checkpoints/ir/ir_2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54c8c5a824cc34e55908bba49bfd8d1ebce4a5190d6a652c6173e7f4db74ee45
+size 1620234113
diff --git a/checkpoints/ss/ss.ckpt b/checkpoints/ss/ss.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e6c12cce7ba5598636cb9b4d572fd3087f023420
--- /dev/null
+++ b/checkpoints/ss/ss.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49f7dc2cf0b08e2ab572ac314ca7954e7fbbb4f36d38dd4f92992e117926fffc
+size 532674815
diff --git a/checkpoints/sts/sts.pth b/checkpoints/sts/sts.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b477956141f6791c73f53b61d2cf3e727a4381c6
--- /dev/null
+++ b/checkpoints/sts/sts.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edabd4f7cb1c51323469474a3cb0477f54de69a3bcafc5a1d65922f27ce88bdd
+size 1719715627
diff --git a/checkpoints/tracker/signboard_2793.pb b/checkpoints/tracker/signboard_2793.pb
new file mode 100644
index 0000000000000000000000000000000000000000..385de1e093db24cc6a0168aa9b05a35ccba77c15
--- /dev/null
+++ b/checkpoints/tracker/signboard_2793.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e7840b492bc134c1966bf855a963d9a66f8d2c398a576aeb351a2acce794820
+size 137072599
diff --git a/lightning_logs/version_0/events.out.tfevents.1701167735.fimo.4905.0 b/lightning_logs/version_0/events.out.tfevents.1701167735.fimo.4905.0
new file mode 100644
index 0000000000000000000000000000000000000000..e8a4ad9bf37566c488b8383cb187c9263bc6bc20
--- /dev/null
+++ b/lightning_logs/version_0/events.out.tfevents.1701167735.fimo.4905.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:008352a978f5f6d301473c1938d1c4688794aa1af7198463684d8230999ac195
+size 40
diff --git a/lightning_logs/version_0/hparams.yaml b/lightning_logs/version_0/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_0/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_1/events.out.tfevents.1701175893.fimo.59819.0 b/lightning_logs/version_1/events.out.tfevents.1701175893.fimo.59819.0
new file mode 100644
index 0000000000000000000000000000000000000000..d60cd108f016911db390207cf788f88b8580fd59
--- /dev/null
+++ b/lightning_logs/version_1/events.out.tfevents.1701175893.fimo.59819.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cffe2c9cdd438c6a96c2975ec4cf22b888ca7b0a96ae1b375a5de13c2cc6db7c
+size 40
diff --git a/lightning_logs/version_1/hparams.yaml b/lightning_logs/version_1/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_1/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_10/events.out.tfevents.1701180422.fimo.161354.0 b/lightning_logs/version_10/events.out.tfevents.1701180422.fimo.161354.0
new file mode 100644
index 0000000000000000000000000000000000000000..f111f7853e050eb83266735fe9aaa271a68ce9e0
--- /dev/null
+++ b/lightning_logs/version_10/events.out.tfevents.1701180422.fimo.161354.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90395911e49f5b00053992675bfd1b9a58d8c78c39341a319eecc700b7f7a384
+size 40
diff --git a/lightning_logs/version_10/hparams.yaml b/lightning_logs/version_10/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_10/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_11/events.out.tfevents.1701180760.fimo.164324.0 b/lightning_logs/version_11/events.out.tfevents.1701180760.fimo.164324.0
new file mode 100644
index 0000000000000000000000000000000000000000..7144f9418a6343b078902d418af1a19f58a27ac0
--- /dev/null
+++ b/lightning_logs/version_11/events.out.tfevents.1701180760.fimo.164324.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:589c18612477a85917deba0409f676f0ee544c2b2b4f62ce31e6ca88cdc7b974
+size 40
diff --git a/lightning_logs/version_11/hparams.yaml b/lightning_logs/version_11/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_11/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_12/events.out.tfevents.1701181916.fimo.194872.0 b/lightning_logs/version_12/events.out.tfevents.1701181916.fimo.194872.0
new file mode 100644
index 0000000000000000000000000000000000000000..33defeaa11cd922044dfcc2a0038f7cf05867020
--- /dev/null
+++ b/lightning_logs/version_12/events.out.tfevents.1701181916.fimo.194872.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:baeb1457d4d912c3d663a1d676e8fce2eda5823b1ddad74eabf88d5b18db68f7
+size 40
diff --git a/lightning_logs/version_12/hparams.yaml b/lightning_logs/version_12/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_12/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_13/events.out.tfevents.1701183113.fimo.236121.0 b/lightning_logs/version_13/events.out.tfevents.1701183113.fimo.236121.0
new file mode 100644
index 0000000000000000000000000000000000000000..884e315b4d510b91f82816ac22ed0b5d72641a10
--- /dev/null
+++ b/lightning_logs/version_13/events.out.tfevents.1701183113.fimo.236121.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0a5b3295a30eba374166dfbba6db41fbbb5a8b35a510db0188d0b1d65cc14a7
+size 40
diff --git a/lightning_logs/version_13/hparams.yaml b/lightning_logs/version_13/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_13/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_2/events.out.tfevents.1701176322.fimo.69714.0 b/lightning_logs/version_2/events.out.tfevents.1701176322.fimo.69714.0
new file mode 100644
index 0000000000000000000000000000000000000000..e889f91b84fc7b34a499d716ef2db891e0c74b3c
--- /dev/null
+++ b/lightning_logs/version_2/events.out.tfevents.1701176322.fimo.69714.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8231a9c3acce5144d11f98fa123674466cfb37d6045f7c72055bc6edad10ac23
+size 40
diff --git a/lightning_logs/version_2/hparams.yaml b/lightning_logs/version_2/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_2/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_3/events.out.tfevents.1701177354.fimo.86480.0 b/lightning_logs/version_3/events.out.tfevents.1701177354.fimo.86480.0
new file mode 100644
index 0000000000000000000000000000000000000000..49fa251dec384d8741d43be02be78006c6e2f62f
--- /dev/null
+++ b/lightning_logs/version_3/events.out.tfevents.1701177354.fimo.86480.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2acaaea1db482d062dfd5eb7c29c167cbec53299b0c077e29e2a985999d970cb
+size 40
diff --git a/lightning_logs/version_3/hparams.yaml b/lightning_logs/version_3/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_3/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_4/events.out.tfevents.1701178112.fimo.105285.0 b/lightning_logs/version_4/events.out.tfevents.1701178112.fimo.105285.0
new file mode 100644
index 0000000000000000000000000000000000000000..ce82dbb9940c9cce8028511948d3a585f0937b52
--- /dev/null
+++ b/lightning_logs/version_4/events.out.tfevents.1701178112.fimo.105285.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd3470ad3e909b274d20f877ddd5400eccca2d46f2f03c2e8ed42a8bed08bd76
+size 40
diff --git a/lightning_logs/version_4/hparams.yaml b/lightning_logs/version_4/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_4/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_5/events.out.tfevents.1701178247.fimo.107694.0 b/lightning_logs/version_5/events.out.tfevents.1701178247.fimo.107694.0
new file mode 100644
index 0000000000000000000000000000000000000000..955b5539dda51d39093e16a5e60c4d586fb1c3f6
--- /dev/null
+++ b/lightning_logs/version_5/events.out.tfevents.1701178247.fimo.107694.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3151878797a860cbfff02f7d59a63ffdd5b4270861dee69dad9019f2921d8bf
+size 40
diff --git a/lightning_logs/version_5/hparams.yaml b/lightning_logs/version_5/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_5/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_6/events.out.tfevents.1701178431.fimo.110736.0 b/lightning_logs/version_6/events.out.tfevents.1701178431.fimo.110736.0
new file mode 100644
index 0000000000000000000000000000000000000000..77326ebea31f91cef3acb80b04489da0be608e47
--- /dev/null
+++ b/lightning_logs/version_6/events.out.tfevents.1701178431.fimo.110736.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eaf92f8e5af54534fe53e2109699dd0155ddd9a62f4f5b3bcfed601402eb0e7b
+size 40
diff --git a/lightning_logs/version_6/hparams.yaml b/lightning_logs/version_6/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_6/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_7/events.out.tfevents.1701179227.fimo.128943.0 b/lightning_logs/version_7/events.out.tfevents.1701179227.fimo.128943.0
new file mode 100644
index 0000000000000000000000000000000000000000..7aac5cf7db13b2cb857ec7c2c1ff0f7fac8e3acc
--- /dev/null
+++ b/lightning_logs/version_7/events.out.tfevents.1701179227.fimo.128943.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a81bc5833a3bfac1698522f0426e979452278bc15a53ac6e27e538247797d148
+size 40
diff --git a/lightning_logs/version_7/hparams.yaml b/lightning_logs/version_7/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_7/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_8/events.out.tfevents.1701179756.fimo.145236.0 b/lightning_logs/version_8/events.out.tfevents.1701179756.fimo.145236.0
new file mode 100644
index 0000000000000000000000000000000000000000..90a86f5731e77fd34299c26c141c7f30c791cc35
--- /dev/null
+++ b/lightning_logs/version_8/events.out.tfevents.1701179756.fimo.145236.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e8b13166405f376eeb7f4d740682b2340c021a08effab75771ea4aba9f27ad0
+size 40
diff --git a/lightning_logs/version_8/hparams.yaml b/lightning_logs/version_8/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_8/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/lightning_logs/version_9/events.out.tfevents.1701180093.fimo.150119.0 b/lightning_logs/version_9/events.out.tfevents.1701180093.fimo.150119.0
new file mode 100644
index 0000000000000000000000000000000000000000..79443ce7df7c16e41491ba627b2166074d5a18f7
--- /dev/null
+++ b/lightning_logs/version_9/events.out.tfevents.1701180093.fimo.150119.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94a6fe1ab4d795ebe9245b5f06e065580edc78362eb74aa9751302162656f8e1
+size 40
diff --git a/lightning_logs/version_9/hparams.yaml b/lightning_logs/version_9/hparams.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93
--- /dev/null
+++ b/lightning_logs/version_9/hparams.yaml
@@ -0,0 +1 @@
+{}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c5692f0f57933d1f6370abc17267efb21bbd3e1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,281 @@
+absl-py==0.15.0
+aiofiles==23.2.1
+aiohttp==3.8.5
+aiosignal==1.3.1
+aiosmtplib==1.1.7
+alabaster==0.7.13
+altair==5.1.1
+annotated-types==0.5.0
+antlr4-python3-runtime==4.9.3
+anyio==3.6.2
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.2.3
+astroid==2.12.12
+astunparse==1.6.3
+async-lru==2.0.4
+async-timeout==4.0.3
+attrs==23.1.0
+Babel==2.12.1
+backcall==0.2.0
+beanie==1.15.3
+beautifulsoup4==4.12.2
+black==23.9.1
+bleach==6.0.0
+blinker==1.5
+cachetools==5.3.1
+certifi==2023.7.22
+cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1636046063618/work
+charset-normalizer==3.2.0
+click==8.1.3
+cloudpickle==2.2.1
+cmake==3.27.2
+colorlog==6.7.0
+comm==0.1.4
+cycler==0.11.0
+Cython==3.0.2
+debugpy==1.6.7.post1
+decorator==5.1.1
+defusedxml==0.7.1
+-e git+https://github.com/PIVASIA/poi_engineering_api.git@bb38c251f6da51f8db8aab799e02ac995733606c#egg=detectron2&subdirectory=src/sts
+dill==0.3.6
+dnspython==2.2.1
+docutils==0.20.1
+easydict==1.10
+ecdsa==0.18.0
+editdistance==0.6.0
+email-validator==1.3.0
+entrypoints==0.4
+exceptiongroup==1.1.3
+fastapi==0.88.0
+fastjsonschema==2.18.0
+ffmpeg==1.4
+ffmpeg-python==0.2.0
+ffmpy==0.3.1
+filelock==3.12.3
+Flask==2.2.2
+flatbuffers==1.12
+fonttools==4.42.1
+fqdn==1.5.1
+frozenlist==1.4.0
+fsspec==2023.6.0
+future @ file:///home/conda/feedstock_root/build_artifacts/future_1666786049710/work
+fvcore==0.1.5.post20221221
+gast==0.3.3
+google-auth==2.22.0
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+gradio==3.50.2
+gradio_client==0.6.1
+grpcio==1.32.0
+h11==0.14.0
+h5py==2.10.0
+httpcore==0.18.0
+httpx==0.25.0
+huggingface-hub==0.17.2
+hydra-colorlog==1.2.0
+hydra-core==1.3.2
+idna==3.4
+imageio==2.31.2
+imagesize==1.4.1
+imgaug==0.4.0
+importlib-metadata==6.8.0
+importlib-resources==6.0.1
+iopath==0.1.7
+ipykernel==6.25.1
+ipyparallel==8.6.1
+ipython==7.34.0
+ipython-genutils==0.2.0
+ipywidgets==8.1.0
+isoduration==20.11.0
+isort==5.10.1
+itsdangerous==2.1.2
+jarowinkler==1.2.3
+jedi==0.19.0
+Jinja2==3.1.2
+joblib==1.3.2
+jose==1.0.0
+json5==0.9.14
+jsonpointer==2.4
+jsonschema==4.19.0
+jsonschema-specifications==2023.7.1
+jupyter-events==0.7.0
+jupyter-lsp==2.2.0
+jupyter_client==8.3.0
+jupyter_core==5.3.1
+jupyter_server==2.7.2
+jupyter_server_terminals==0.4.4
+jupyterlab==4.0.5
+jupyterlab-pygments==0.2.2
+jupyterlab-widgets==3.0.8
+jupyterlab_server==2.24.0
+Keras==2.4.0
+Keras-Applications==1.0.8
+Keras-Preprocessing==1.1.2
+kiwisolver==1.4.5
+lazy-model==0.2.0
+lazy-object-proxy==1.8.0
+libclang==16.0.6
+lightning-utilities==0.9.0
+lit==16.0.6
+lxml==4.9.3
+Markdown==3.4.4
+markdown-it-py==3.0.0
+MarkupSafe==2.1.1
+matplotlib==3.5.3
+matplotlib-inline==0.1.6
+mccabe==0.7.0
+mdurl==0.1.2
+mistune==3.0.1
+motor==3.1.1
+mpmath==1.3.0
+multidict==6.0.2
+MyApplication==0.1.0
+mypy-extensions==1.0.0
+nbclient==0.8.0
+nbconvert==7.7.4
+nbformat==5.9.2
+nest-asyncio==1.5.7
+networkx==3.1
+nose==1.3.7
+notebook==7.0.2
+notebook_shim==0.2.3
+numpy==1.20.3
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+oauthlib==3.2.2
+olefile @ file:///home/conda/feedstock_root/build_artifacts/olefile_1602866521163/work
+omegaconf==2.3.0
+opencv-python==4.8.0.76
+opt-einsum==3.3.0
+orjson==3.9.7
+overrides==7.4.0
+packaging==23.1
+pandas==2.0.3
+pandocfilters==1.5.0
+parso==0.8.3
+passlib==1.7.4
+pathspec==0.11.2
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow @ file:///home/conda/feedstock_root/build_artifacts/pillow_1630696607296/work
+pkgutil_resolve_name==1.3.10
+platformdirs==2.5.4
+Polygon3==3.0.9.1
+portalocker==2.8.2
+prometheus-client==0.17.1
+prompt-toolkit==3.0.39
+protobuf==3.19.6
+psutil==5.9.5
+ptyprocess==0.7.0
+pyasn1==0.4.8
+pyasn1-modules==0.3.0
+pycocotools==2.0.7
+pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work
+pydantic==1.10.2
+pydantic_core==2.6.3
+pyDeprecate==0.3.2
+pydot==1.4.2
+pydub==0.25.1
+Pygments==2.16.1
+pylint==2.15.5
+pymongo==4.3.2
+pyparsing==3.1.1
+python-dateutil==2.8.2
+python-decouple==3.6
+python-dotenv==1.0.0
+python-jose==3.3.0
+python-json-logger==2.0.7
+python-multipart==0.0.5
+pytorch-lightning==1.9.5
+pytz==2023.3
+PyWavelets==1.4.1
+PyYAML==6.0.1
+pyzmq==25.1.1
+qtconsole==5.4.3
+QtPy==2.3.1
+rapidfuzz==2.4.2
+referencing==0.30.2
+regex==2023.10.3
+requests==2.31.0
+requests-oauthlib==1.3.1
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.6.0
+rpds-py==0.10.0
+rsa==4.9
+safetensors==0.4.0
+scikit-image==0.19.3
+scikit-learn==1.3.1
+scipy==1.9.0
+semantic-version==2.10.0
+Send2Trash==1.8.2
+Shapely==1.8.2
+six==1.15.0
+sniffio==1.3.0
+snowballstemmer==2.2.0
+soupsieve==2.4.1
+Sphinx==7.1.2
+sphinxcontrib-applehelp==1.0.4
+sphinxcontrib-devhelp==1.0.2
+sphinxcontrib-htmlhelp==2.0.1
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.3
+sphinxcontrib-serializinghtml==1.1.5
+starlette==0.22.0
+sympy==1.12
+tabulate==0.9.0
+tensorboard==2.9.1
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.4.3
+tensorflow-estimator==2.4.0
+tensorflow-gpu==2.3.0
+tensorflow-io-gcs-filesystem==0.34.0
+termcolor==1.1.0
+terminado==0.17.1
+testpath==0.6.0
+threadpoolctl==3.2.0
+tifffile==2023.7.10
+timm==0.6.7
+tinycss2==1.2.1
+tokenizers==0.12.1
+toml==0.10.2
+tomli==2.0.1
+tomlkit==0.11.6
+toolz==0.12.0
+torch==1.10.1+cu111
+torchaudio==0.10.1+cu111
+torchmetrics==0.11.4
+torchvision==0.11.2+cu111
+tornado==6.3.3
+tqdm==4.66.1
+traitlets==5.9.0
+transformers==4.21.1
+triton==2.0.0
+typing_extensions==4.7.1
+tzdata==2023.3
+uri-template==1.3.0
+urllib3==1.26.16
+uvicorn==0.20.0
+vncorenlp==1.0.3
+wcwidth==0.2.6
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.6.2
+websockets==11.0.3
+Werkzeug==2.2.2
+widgetsnbextension==4.0.8
+wrapt==1.12.1
+yacs==0.1.8
+yarl==1.8.1
+zipp==3.16.2
diff --git a/requirements/ir.txt b/requirements/ir.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79d50d8017a8d957797044862187651d2146322b
--- /dev/null
+++ b/requirements/ir.txt
@@ -0,0 +1,33 @@
+# --------- hydra --------- #
+hydra-core==1.2.0
+hydra-colorlog==1.2.0
+hydra-optuna-sweeper==1.2.0
+
+# --------- loggers --------- #
+# wandb
+# neptune-client
+# mlflow
+# comet-ml
+# tensorboard
+
+# --------- linters --------- #
+pre-commit     # hooks for applying linters on commit
+black      # code formatting
+isort         # import sorting
+flake8      # code analysis
+nbstripout     # remove output from jupyter notebooks
+
+# --------- others --------- #
+python-dotenv   # loading env variables from .env file
+rich           # beautiful text formatting in terminal
+pytest          # tests
+sh              # for running bash commands in some tests
+pudb          # debugger
+
+jsonschema
+matplotlib
+pandas
+seaborn
+sklearn
+transformers
+vncorenlp
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5700823217bdd969cfe38345860bdaf72f23633d
--- /dev/null
+++ b/requirements/requirements.txt
@@ -0,0 +1,157 @@
+absl-py==1.2.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+alembic==1.8.1
+aniso8601==9.0.1
+antlr4-python3-runtime==4.9.3
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==22.1.0
+autopage==0.5.1
+black==22.6.0
+cachetools==5.2.0
+cfgv==3.3.1
+charset-normalizer==2.1.0
+click==8.1.3
+cliff==3.10.1
+cloudpickle==2.1.0
+cmaes==0.8.2
+cmd2==2.4.2
+colorlog==6.6.0
+commonmark==0.9.1
+cycler==0.11.0
+distlib==0.3.5
+editdistance==0.6.0
+fastjsonschema==2.16.1
+filelock==3.7.1
+flake8==5.0.4
+Flask==2.2.1
+Flask-RESTful==0.3.9
+flatbuffers==1.12
+fonttools==4.34.4
+frozenlist==1.3.1
+fsspec==2022.7.1
+future==0.18.2
+fvcore==0.1.5.post20220512
+gast==0.4.0
+google-auth==2.9.1
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+greenlet==1.1.2
+grpcio==1.47.0
+h5py==3.7.0
+huggingface-hub==0.8.1
+hydra-colorlog==1.2.0
+hydra-core==1.2.0
+hydra-optuna-sweeper==1.2.0
+identify==2.5.3
+idna==3.3
+importlib-metadata==4.12.0
+importlib-resources==5.9.0
+iniconfig==1.1.1
+iopath==0.1.7
+isort==5.10.1
+itsdangerous==2.1.2
+jarowinkler==1.2.0
+jedi==0.18.1
+Jinja2==3.1.2
+joblib==1.1.0
+jsonschema==4.9.1
+jupyter-core==4.11.1
+keras==2.9.0
+Keras-Preprocessing==1.1.2
+kiwisolver==1.4.4
+libclang==14.0.6
+Mako==1.2.1
+Markdown==3.4.1
+MarkupSafe==2.1.1
+matplotlib==3.5.2
+mccabe==0.7.0
+multidict==6.0.2
+mypy-extensions==0.4.3
+nbformat==5.4.0
+nbstripout==0.6.0
+nodeenv==1.7.0
+numpy==1.23.1
+oauthlib==3.2.0
+omegaconf==2.2.2
+opencv-python==4.6.0.66
+opt-einsum==3.3.0
+optuna==2.10.1
+packaging==21.3
+pandas==1.4.3
+parso==0.8.3
+pathspec==0.9.0
+pbr==5.9.0
+Pillow==9.2.0
+pkgutil_resolve_name==1.3.10
+platformdirs==2.5.2
+pluggy==1.0.0
+Polygon3==3.0.9.1
+portalocker==2.5.1
+pre-commit==2.20.0
+prettytable==3.3.0
+protobuf==3.19.4
+pudb==2022.1.2
+py==1.11.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycocotools==2.0.4
+pycodestyle==2.9.1
+pyDeprecate==0.3.2
+pydot==1.4.2
+pyflakes==2.5.0
+Pygments==2.12.0
+pyparsing==3.0.9
+pyperclip==1.8.2
+pyrsistent==0.18.1
+pytest==7.1.2
+python-dateutil==2.8.2
+python-dotenv==0.20.0
+pytorch-lightning==1.6.5
+pytz==2022.1
+PyYAML==6.0
+rapidfuzz==2.4.2
+regex==2022.7.25
+requests==2.28.1
+requests-oauthlib==1.3.1
+rich==12.5.1
+rsa==4.9
+scikit-learn==1.1.1
+scipy==1.9.0
+seaborn==0.11.2
+sh==1.14.3
+Shapely==1.8.2
+six==1.16.0
+sklearn==0.0
+SQLAlchemy==1.4.39
+stevedore==4.0.0
+tabulate==0.8.10
+tensorboard==2.9.1
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.9.1
+tensorflow-estimator==2.9.0
+tensorflow-io-gcs-filesystem==0.26.0
+termcolor==1.1.0
+threadpoolctl==3.1.0
+timm==0.6.7
+tokenizers==0.12.1
+toml==0.10.2
+tomli==2.0.1
+torchmetrics==0.9.3
+tqdm==4.64.0
+traitlets==5.3.0
+transformers==4.21.0
+typing_extensions==4.3.0
+urllib3==1.26.11
+urwid==2.1.2
+urwid-readline==0.13
+virtualenv==20.16.2
+vncorenlp==1.0.3
+wcwidth==0.2.5
+Werkzeug==2.2.1
+wrapt==1.14.1
+yacs==0.1.8
+yarl==1.8.1
+zipp==3.8.1
\ No newline at end of file
diff --git a/requirements/ss.txt b/requirements/ss.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4e3a0f41a4713b8ee5725f2f27059f2997893e7
--- /dev/null
+++ b/requirements/ss.txt
@@ -0,0 +1,2 @@
+pytorch-lightning==1.6.5
+opencv-python
\ No newline at end of file
diff --git a/requirements/sts.txt b/requirements/sts.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1104a27aff2125e44c80c3442eea8d1835c68bc
--- /dev/null
+++ b/requirements/sts.txt
@@ -0,0 +1,6 @@
+scipy==1.9.0
+shapely==1.8.2
+rapidfuzz==2.4.2
+timm==0.6.7
+Polygon3==3.0.9.1
+editdistance==0.6.0
\ No newline at end of file
diff --git a/src/ir/.env.example b/src/ir/.env.example
new file mode 100644
index 0000000000000000000000000000000000000000..15b883ca47fae4363526392aab805d0bc560923b
--- /dev/null
+++ b/src/ir/.env.example
@@ -0,0 +1,7 @@
+# this is example of the file that can be used for storing private and user specific environment variables, like keys or system paths
+# create a file named .env (by default .env will be excluded from version control)
+# the variables declared in .env are loaded in train.py automatically
+# hydra allows you to reference variables in .yaml configs with special syntax: ${oc.env:MY_VAR}
+
+MY_VAR="/home/user/my/system/path"
+MY_KEY="asdgjhawi8y23ihsghsueity23ihwd"
\ No newline at end of file
diff --git a/src/ir/.pre-commit-config.yaml b/src/ir/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f415b330b4c77603422b64b90f190525a67815e
--- /dev/null
+++ b/src/ir/.pre-commit-config.yaml
@@ -0,0 +1,51 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.1.0
+    hooks:
+      # list of supported hooks: https://pre-commit.com/hooks.html
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: debug-statements
+      - id: detect-private-key
+
+  # python code formatting
+  - repo: https://github.com/psf/black
+    rev: 22.1.0
+    hooks:
+      - id: black
+        args: [--line-length, "99"]
+
+  # python import sorting
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.10.1
+    hooks:
+      - id: isort
+        args: ["--profile", "black", "--filter-files"]
+
+  # python docstring formatting
+  - repo: https://github.com/myint/docformatter
+    rev: v1.4
+    hooks:
+      - id: docformatter
+        args: [--in-place, --wrap-summaries, "99", --wrap-descriptions, "92"]
+
+  # yaml formatting
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v2.5.1
+    hooks:
+      - id: prettier
+        types: [yaml]
+
+  # python code analysis
+  - repo: https://github.com/PyCQA/flake8
+    rev: 4.0.1
+    hooks:
+      - id: flake8
+
+  # jupyter notebook cell output clearing
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.5.0
+    hooks:
+      - id: nbstripout
\ No newline at end of file
diff --git a/src/ir/README.md b/src/ir/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/__pycache__/ir.cpython-38.pyc b/src/ir/__pycache__/ir.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0985329c964e0fc05608cd4d14770f4faeeceeb7
Binary files /dev/null and b/src/ir/__pycache__/ir.cpython-38.pyc differ
diff --git a/src/ir/configs/callbacks/default.yaml b/src/ir/configs/callbacks/default.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd91ffa25487d545a624ae92179128df4412de1d
--- /dev/null
+++ b/src/ir/configs/callbacks/default.yaml
@@ -0,0 +1,24 @@
+model_checkpoint:
+  _target_: pytorch_lightning.callbacks.ModelCheckpoint
+  monitor: "val_loss" # name of the logged metric which determines when model is improving
+  mode: "min" # "max" means higher metric value is better, can be also "min"
+  save_top_k: 3 # save k best models (determined by above metric)
+  save_last: True # additionaly always save model from last epoch
+  verbose: False
+  dirpath: "checkpoints/"
+  filename: "epoch_{epoch:03d}"
+  auto_insert_metric_name: False
+
+early_stopping:
+  _target_: pytorch_lightning.callbacks.EarlyStopping
+  monitor: "val_loss" # name of the logged metric which determines when model is improving
+  mode: "min" # "max" means higher metric value is better, can be also "min"
+  patience: 3 # how many validation epochs of not improving until training stops
+  min_delta: 0.001 # minimum change in the monitored metric needed to qualify as an improvement
+
+model_summary:
+  _target_: pytorch_lightning.callbacks.RichModelSummary
+  max_depth: -1
+
+rich_progress_bar:
+  _target_: pytorch_lightning.callbacks.RichProgressBar
\ No newline at end of file
diff --git a/src/ir/configs/datamodule/tc.yaml b/src/ir/configs/datamodule/tc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..56a73d4f5359154adabd6900650a2ca10ed838fc
--- /dev/null
+++ b/src/ir/configs/datamodule/tc.yaml
@@ -0,0 +1,9 @@
+_target_: src.datamodules.tc_datamodule.TCDataModule
+
+data_dir: ${data_dir} # data_dir is specified in config.yaml
+batch_size: 64
+train_test_split: 0.1
+num_workers: 4
+pin_memory: False
+tokenizer: "vinai/phobert-base"
+seed: ${seed}
diff --git a/src/ir/configs/debug/default.yaml b/src/ir/configs/debug/default.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8dfb104efb390a27e06de727c37578a709bf7bfd
--- /dev/null
+++ b/src/ir/configs/debug/default.yaml
@@ -0,0 +1,28 @@
+# @package _global_
+
+# default debugging setup, runs 1 full epoch
+# other debugging configs can inherit from this one
+
+defaults:
+  - override /log_dir: debug.yaml
+
+trainer:
+  max_epochs: 1
+  gpus: 0 # debuggers don't like gpus
+  detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
+  track_grad_norm: 2 # track gradient norm with loggers
+
+datamodule:
+  num_workers: 0 # debuggers don't like multiprocessing
+  pin_memory: False # disable gpu memory pin
+
+# sets level of all command line loggers to 'DEBUG'
+# https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
+hydra:
+  verbose: True
+
+  # use this to set level of only chosen command line loggers to 'DEBUG':
+  # verbose: [src.train, src.utils]
+
+# config is already printed by hydra when `hydra/verbose: True`
+print_config: False
diff --git a/src/ir/configs/debug/limit_batches.yaml b/src/ir/configs/debug/limit_batches.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..feeb82df82b057a412fa7ae5a03d626b35eb02ae
--- /dev/null
+++ b/src/ir/configs/debug/limit_batches.yaml
@@ -0,0 +1,12 @@
+# @package _global_
+
+# uses only 1% of the training data and 5% of validation/test data
+
+defaults:
+  - default.yaml
+
+trainer:
+  max_epochs: 3
+  limit_train_batches: 0.01
+  limit_val_batches: 0.05
+  limit_test_batches: 0.05
\ No newline at end of file
diff --git a/src/ir/configs/debug/overfit.yaml b/src/ir/configs/debug/overfit.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..26fe2acfa79963e46e84bbefdba44d0803016d3b
--- /dev/null
+++ b/src/ir/configs/debug/overfit.yaml
@@ -0,0 +1,10 @@
+# @package _global_
+
+# overfits to 3 batches
+
+defaults:
+  - default.yaml
+
+trainer:
+  max_epochs: 20
+  overfit_batches: 3
\ No newline at end of file
diff --git a/src/ir/configs/debug/profiler.yaml b/src/ir/configs/debug/profiler.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a76391c35f1394e1c47c7ed99cad7d20974e5f33
--- /dev/null
+++ b/src/ir/configs/debug/profiler.yaml
@@ -0,0 +1,12 @@
+# @package _global_
+
+# runs with execution time profiling
+
+defaults:
+  - default.yaml
+
+trainer:
+  max_epochs: 1
+  profiler: "simple"
+  # profiler: "advanced"
+  # profiler: "pytorch"
\ No newline at end of file
diff --git a/src/ir/configs/debug/step.yaml b/src/ir/configs/debug/step.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4fce8792c733331530ea0bdb6e80633c2f0522c9
--- /dev/null
+++ b/src/ir/configs/debug/step.yaml
@@ -0,0 +1,9 @@
+# @package _global_
+
+# runs 1 train, 1 validation and 1 test step
+
+defaults:
+  - default.yaml
+
+trainer:
+  fast_dev_run: true
\ No newline at end of file
diff --git a/src/ir/configs/debug/test_only.yaml b/src/ir/configs/debug/test_only.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bbc6df535c232be6781f688fcd406cbd987d8ec9
--- /dev/null
+++ b/src/ir/configs/debug/test_only.yaml
@@ -0,0 +1,9 @@
+# @package _global_
+
+# runs only test epoch
+
+defaults:
+  - default.yaml
+
+train: False
+test: True
\ No newline at end of file
diff --git a/src/ir/configs/experiment/example.yaml b/src/ir/configs/experiment/example.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3af4bb41f3ea7a38e2edf45462f482fb28413bb0
--- /dev/null
+++ b/src/ir/configs/experiment/example.yaml
@@ -0,0 +1,38 @@
+# @package _global_
+
+# to execute this experiment run:
+# python train.py experiment=example
+
+defaults:
+  - override /datamodule: mnist.yaml
+  - override /model: mnist.yaml
+  - override /callbacks: default.yaml
+  - override /logger: null
+  - override /trainer: default.yaml
+
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+
+# name of the run determines folder name in logs
+name: "simple_dense_net"
+
+seed: 12345
+
+trainer:
+  min_epochs: 10
+  max_epochs: 10
+  gradient_clip_val: 0.5
+
+model:
+  lr: 0.002
+  net:
+    lin1_size: 128
+    lin2_size: 256
+    lin3_size: 64
+
+datamodule:
+  batch_size: 64
+
+logger:
+  wandb:
+    tags: ["mnist", "${name}"]
\ No newline at end of file
diff --git a/src/ir/configs/hparams_search/mnist_optuna.yaml b/src/ir/configs/hparams_search/mnist_optuna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6eba299fbd5dc03136f820c14ecc01eef4d9186e
--- /dev/null
+++ b/src/ir/configs/hparams_search/mnist_optuna.yaml
@@ -0,0 +1,60 @@
+# @package _global_
+
+# example hyperparameter optimization of some experiment with Optuna:
+# python train.py -m hparams_search=mnist_optuna experiment=example
+
+defaults:
+  - override /hydra/sweeper: optuna
+
+# choose metric which will be optimized by Optuna
+# make sure this is the correct name of some metric logged in lightning module!
+optimized_metric: "val/acc_best"
+
+# here we define Optuna hyperparameter search
+# it optimizes for value returned from function with @hydra.main decorator
+# docs: https://hydra.cc/docs/next/plugins/optuna_sweeper
+hydra:
+  sweeper:
+    _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+
+    # storage URL to persist optimization results
+    # for example, you can use SQLite if you set 'sqlite:///example.db'
+    storage: null
+
+    # name of the study to persist optimization results
+    study_name: null
+
+    # number of parallel workers
+    n_jobs: 1
+
+    # 'minimize' or 'maximize' the objective
+    direction: maximize
+
+    # total number of runs that will be executed
+    n_trials: 25
+
+    # choose Optuna hyperparameter sampler
+    # docs: https://optuna.readthedocs.io/en/stable/reference/samplers.html
+    sampler:
+      _target_: optuna.samplers.TPESampler
+      seed: 12345
+      n_startup_trials: 10 # number of random sampling runs before optimization starts
+
+    # define range of hyperparameters
+    search_space:
+      datamodule.batch_size:
+        type: categorical
+        choices: [32, 64, 128]
+      model.lr:
+        type: float
+        low: 0.0001
+        high: 0.2
+      model.net.lin1_size:
+        type: categorical
+        choices: [32, 64, 128, 256, 512]
+      model.net.lin2_size:
+        type: categorical
+        choices: [32, 64, 128, 256, 512]
+      model.net.lin3_size:
+        type: categorical
+        choices: [32, 64, 128, 256, 512]
\ No newline at end of file
diff --git a/src/ir/configs/local/.gitkeep b/src/ir/configs/local/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/configs/log_dir/debug.yaml b/src/ir/configs/log_dir/debug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a47561b28aebc1d972a65538c791060a795292f2
--- /dev/null
+++ b/src/ir/configs/log_dir/debug.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+
+hydra:
+  run:
+    dir: logs/debugs/runs/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+  sweep:
+    dir: logs/debugs/multiruns/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
\ No newline at end of file
diff --git a/src/ir/configs/log_dir/default.yaml b/src/ir/configs/log_dir/default.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..00ec902ed89d0663dbfbf372d4ee11bbe53f9434
--- /dev/null
+++ b/src/ir/configs/log_dir/default.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+
+hydra:
+  run:
+    dir: logs/experiments/runs/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+  sweep:
+    dir: logs/experiments/multiruns/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
\ No newline at end of file
diff --git a/src/ir/configs/log_dir/evaluation.yaml b/src/ir/configs/log_dir/evaluation.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b3425185e70d70851ea8f3039d5b46cf93a2f3ae
--- /dev/null
+++ b/src/ir/configs/log_dir/evaluation.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+
+hydra:
+  run:
+    dir: logs/evaluations/runs/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+  sweep:
+    dir: logs/evaluations/multiruns/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
\ No newline at end of file
diff --git a/src/ir/configs/logger/comet.yaml b/src/ir/configs/logger/comet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab4193b92e454c7d90138cf1e36ae386408fa2dc
--- /dev/null
+++ b/src/ir/configs/logger/comet.yaml
@@ -0,0 +1,7 @@
+# https://www.comet.ml
+
+comet:
+  _target_: pytorch_lightning.loggers.comet.CometLogger
+  api_key: ${oc.env:COMET_API_TOKEN} # api key is loaded from environment variable
+  project_name: "template-tests"
+  experiment_name: ${name}
\ No newline at end of file
diff --git a/src/ir/configs/logger/csv.yaml b/src/ir/configs/logger/csv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b7d6936986a39d250750d3d80a795fe89e269a86
--- /dev/null
+++ b/src/ir/configs/logger/csv.yaml
@@ -0,0 +1,7 @@
+# csv logger built in lightning
+
+csv:
+  _target_: pytorch_lightning.loggers.csv_logs.CSVLogger
+  save_dir: "."
+  name: "csv/"
+  prefix: ""
\ No newline at end of file
diff --git a/src/ir/configs/logger/many_loggers.yaml b/src/ir/configs/logger/many_loggers.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d142df7eb44b59c969ba7485d1264685d8c8122c
--- /dev/null
+++ b/src/ir/configs/logger/many_loggers.yaml
@@ -0,0 +1,9 @@
+# train with many loggers at once
+
+defaults:
+  # - comet.yaml
+  - csv.yaml
+  # - mlflow.yaml
+  # - neptune.yaml
+  - tensorboard.yaml
+  - wandb.yaml
\ No newline at end of file
diff --git a/src/ir/configs/logger/mlflow.yaml b/src/ir/configs/logger/mlflow.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..49a47a78e00d21a0ca7ce2dda82108c04df18ec2
--- /dev/null
+++ b/src/ir/configs/logger/mlflow.yaml
@@ -0,0 +1,9 @@
+# https://mlflow.org
+
+mlflow:
+  _target_: pytorch_lightning.loggers.mlflow.MLFlowLogger
+  experiment_name: ${name}
+  tracking_uri: ${original_work_dir}/logs/mlflow/mlruns # run `mlflow ui` command inside the `logs/mlflow/` dir to open the UI
+  tags: null
+  prefix: ""
+  artifact_location: null
\ No newline at end of file
diff --git a/src/ir/configs/logger/neptune.yaml b/src/ir/configs/logger/neptune.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f141574f2be60180166ea7e8b9b856dc955c78fa
--- /dev/null
+++ b/src/ir/configs/logger/neptune.yaml
@@ -0,0 +1,11 @@
+# https://neptune.ai
+
+neptune:
+  _target_: pytorch_lightning.loggers.neptune.NeptuneLogger
+  api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
+  project_name: your_name/template-tests
+  close_after_fit: True
+  offline_mode: False
+  experiment_name: ${name}
+  experiment_id: null
+  prefix: ""
\ No newline at end of file
diff --git a/src/ir/configs/logger/tensorboard.yaml b/src/ir/configs/logger/tensorboard.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ca71f4533b47bc4ba9d0d98aad6da4455e08a03
--- /dev/null
+++ b/src/ir/configs/logger/tensorboard.yaml
@@ -0,0 +1,10 @@
+# https://www.tensorflow.org/tensorboard/
+
+tensorboard:
+  _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger
+  save_dir: "tensorboard/"
+  name: null
+  version: ${name}
+  log_graph: False
+  default_hp_metric: True
+  prefix: ""
\ No newline at end of file
diff --git a/src/ir/configs/logger/wandb.yaml b/src/ir/configs/logger/wandb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3036cea8869739d7a8eb8971bc49fc65289a6893
--- /dev/null
+++ b/src/ir/configs/logger/wandb.yaml
@@ -0,0 +1,15 @@
+# https://wandb.ai
+
+wandb:
+  _target_: pytorch_lightning.loggers.wandb.WandbLogger
+  project: "template-tests"
+  # name: ${name}
+  save_dir: "."
+  offline: False # set True to store all logs only locally
+  id: null # pass correct id to resume experiment!
+  # entity: ""  # set to name of your wandb team
+  log_model: False
+  prefix: ""
+  job_type: "train"
+  group: ""
+  tags: []
\ No newline at end of file
diff --git a/src/ir/configs/model/tc.yaml b/src/ir/configs/model/tc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c3765f54b9e5a3de6f8cbed3762dc5cd97ba5836
--- /dev/null
+++ b/src/ir/configs/model/tc.yaml
@@ -0,0 +1,17 @@
+train:
+  _target_: src.models.tc_classifier.TCClassifier
+  model_name: "vinai/phobert-base"
+  n_classes: 5
+  lr: 1e-5
+  scheduler_type: "constant_schedule_with_warmup"
+  max_steps: 10000
+  weight_decay: 0.01
+  classifier_dropout: 0.1
+  mixout: False
+  freeze_encoder: False
+
+test:
+  _target_: src.models.tc_classification_task.TCClassificationTask
+  model: null
+  function_to_apply: "sigmoid"
+  return_all_scores: True
diff --git a/src/ir/configs/test.yaml b/src/ir/configs/test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ab29fa2efe86360ed8282175398836b04a6d47d
--- /dev/null
+++ b/src/ir/configs/test.yaml
@@ -0,0 +1,30 @@
+# @package _global_
+
+# specify here default evaluation configuration
+defaults:
+  - _self_
+  - datamodule: tc.yaml # choose the datamodule for evaluation
+  - model: tc.yaml
+  - callbacks: default.yaml
+  - logger: tensorboard.yaml
+  - trainer: default.yaml
+  - log_dir: evaluation.yaml
+
+  - experiment: null
+
+  # enable color logging
+  - override hydra/hydra_logging: colorlog
+  - override hydra/job_logging: colorlog
+
+data_dir: None
+
+print_config: True
+
+ignore_warnings: True
+
+seed: 42
+
+name: "default"
+
+# passing checkpoint path is necessary
+ckpt_path: ./checkpoints/ir/ir_2.ckpt
diff --git a/src/ir/configs/train.yaml b/src/ir/configs/train.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ae988417ce9d7007a0ce50e23bf9ebb78c854468
--- /dev/null
+++ b/src/ir/configs/train.yaml
@@ -0,0 +1,56 @@
+# @package _global_
+
+# specify here default training configuration
+defaults:
+  - _self_
+  - datamodule: tc.yaml
+  - model: tc.yaml
+  - callbacks: default.yaml
+  - logger: tensorboard.yaml # set logger here or use command line (e.g. `python train.py logger=tensorboard`)
+  - trainer: default.yaml
+  - log_dir: default.yaml
+
+  # experiment configs allow for version control of specific configurations
+  # e.g. best hyperparameters for each combination of model and datamodule
+  - experiment: null
+
+  # debugging config (enable through command line, e.g. `python train.py debug=default)
+  - debug: null
+
+  # config for hyperparameter optimization
+  - hparams_search: null
+
+  # optional local config for machine/user specific settings
+  # it's optional since it doesn't need to exist and is excluded from version control
+  - optional local: default.yaml
+
+  # enable color logging
+  - override hydra/hydra_logging: colorlog
+  - override hydra/job_logging: colorlog
+
+# path to original working directory
+# hydra hijacks working directory by changing it to the new log directory
+# https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
+original_work_dir: ${hydra:runtime.cwd}
+
+# path to folder with data
+data_dir: ${original_work_dir}/data
+# pretty print config at the start of the run using Rich library
+print_config: True
+
+# disable python warnings if they annoy you
+ignore_warnings: True
+
+# set False to skip model training
+train: True
+
+# evaluate on test set, using best model weights achieved during training
+# lightning chooses best weights based on the metric specified in checkpoint callback
+test: True
+
+# seed for random number generators in pytorch, numpy and python.random
+seed: 42
+
+# default name for the experiment, determines logging folder path
+# (you can overwrite this name in experiment configs)
+name: "default"
\ No newline at end of file
diff --git a/src/ir/configs/trainer/ddp.yaml b/src/ir/configs/trainer/ddp.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..becd40f634e1f8edcd968aeca546c8eab8f1d9dd
--- /dev/null
+++ b/src/ir/configs/trainer/ddp.yaml
@@ -0,0 +1,6 @@
+defaults:
+  - default.yaml
+
+gpus: 4
+strategy: ddp
+sync_batchnorm: True
\ No newline at end of file
diff --git a/src/ir/configs/trainer/default.yaml b/src/ir/configs/trainer/default.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..47bf4d2f1237cdde6a116594de95d7fbabe69682
--- /dev/null
+++ b/src/ir/configs/trainer/default.yaml
@@ -0,0 +1,12 @@
+_target_: pytorch_lightning.Trainer
+
+gpus: 1
+
+min_epochs: 10
+max_epochs: 50
+
+# number of validation steps to execute at the beginning of the training
+# num_sanity_val_steps: 0
+
+# ckpt path
+resume_from_checkpoint: null
diff --git a/src/ir/ir.py b/src/ir/ir.py
new file mode 100644
index 0000000000000000000000000000000000000000..79b71a22f9dd226563005ee1c860b14f21c90885
--- /dev/null
+++ b/src/ir/ir.py
@@ -0,0 +1,25 @@
+import dotenv
+from omegaconf import DictConfig, OmegaConf
+from hydra import compose, initialize
+
+
+# load environment variables from `.env` file if it exists
+# recursively searches for `.env` in all folders starting from work dir
+dotenv.load_dotenv(override=True)
+
+# @hydra.main(version_base = None, config_path="configs/", config_name="test.yaml")
+def handle_ir(img_path, input, output):
+    with initialize(version_base=None, config_path="configs", job_name="ir"):
+        config = compose(config_name="test")
+        # print(OmegaConf.to_yaml(cfg))
+
+    # Imports can be nested inside @hydra.main to optimize tab completion
+    # https://github.com/facebookresearch/hydra/issues/934
+    from src.ir.src import utils
+    from src.ir.src.testing_pipeline import test
+
+    # Applies optional utilities
+    utils.extras(config)
+
+    # Evaluate model
+    return test(config, input, output, img_path)
diff --git a/src/ir/scipts/schedule.sh b/src/ir/scipts/schedule.sh
new file mode 100644
index 0000000000000000000000000000000000000000..11414d974c793ac55fd5cbdd1c24c4b8e994b613
--- /dev/null
+++ b/src/ir/scipts/schedule.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# Shedule execution of many runs
+# Run from root folder with: bash scripts/schedule.sh
+
+python train.py trainer.max_epochs=5
+
+python train.py trainer.max_epochs=10 logger=csv
\ No newline at end of file
diff --git a/src/ir/setup.cfg b/src/ir/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..ea95b8c23dee682a5cffedfbd35e3d126810ac39
--- /dev/null
+++ b/src/ir/setup.cfg
@@ -0,0 +1,36 @@
+[isort]
+line_length = 99
+profile = black
+filter_files = True
+
+
+[flake8]
+max_line_length = 99
+show_source = True
+format = pylint
+ignore =
+    F401  # Module imported but unused
+    W504  # Line break occurred after a binary operator
+    F841  # Local variable name is assigned to but never used
+    E501  # Line too long
+exclude =
+    .git
+    __pycache__
+    data/*
+    tests/*
+    notebooks/*
+    logs/*
+
+
+[tool:pytest]
+testpaths = tests/
+log_cli = True
+markers =
+    slow
+addopts =
+    --durations=0
+    --strict-markers
+    --doctest-modules
+filterwarnings =
+    ignore::DeprecationWarning
+    ignore::UserWarning
\ No newline at end of file
diff --git a/src/ir/src/__init__.py b/src/ir/src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/src/__pycache__/__init__.cpython-38.pyc b/src/ir/src/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10505199c553d897db629132c97208ff7fc7e34d
Binary files /dev/null and b/src/ir/src/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/ir/src/__pycache__/testing_pipeline.cpython-38.pyc b/src/ir/src/__pycache__/testing_pipeline.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..836a768476c899d6748700d79a696b0490ad3255
Binary files /dev/null and b/src/ir/src/__pycache__/testing_pipeline.cpython-38.pyc differ
diff --git a/src/ir/src/datamodules/__init__.py b/src/ir/src/datamodules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/src/datamodules/__pycache__/__init__.cpython-38.pyc b/src/ir/src/datamodules/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ea504babeb08b0ee2babc019467036ca5fc818d9
Binary files /dev/null and b/src/ir/src/datamodules/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/ir/src/datamodules/__pycache__/tc_datamodule.cpython-38.pyc b/src/ir/src/datamodules/__pycache__/tc_datamodule.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6c93d672cea1016c827625a15acf97845238a1b7
Binary files /dev/null and b/src/ir/src/datamodules/__pycache__/tc_datamodule.cpython-38.pyc differ
diff --git a/src/ir/src/datamodules/__pycache__/tc_dataset.cpython-38.pyc b/src/ir/src/datamodules/__pycache__/tc_dataset.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ff34b2ca4a9ef9ca896b5f9e1333482e4f4a06a
Binary files /dev/null and b/src/ir/src/datamodules/__pycache__/tc_dataset.cpython-38.pyc differ
diff --git a/src/ir/src/datamodules/__pycache__/tc_transform.cpython-38.pyc b/src/ir/src/datamodules/__pycache__/tc_transform.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5f37100d4980055b175e4a32b996cdb0e1db9afb
Binary files /dev/null and b/src/ir/src/datamodules/__pycache__/tc_transform.cpython-38.pyc differ
diff --git a/src/ir/src/datamodules/components/__init__.py b/src/ir/src/datamodules/components/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/src/datamodules/tc_datamodule.py b/src/ir/src/datamodules/tc_datamodule.py
new file mode 100644
index 0000000000000000000000000000000000000000..09cc456b3f00aca2627e639766ea54330e1ef54d
--- /dev/null
+++ b/src/ir/src/datamodules/tc_datamodule.py
@@ -0,0 +1,96 @@
+from torch.utils.data import DataLoader
+
+import numpy as np
+import pandas as pd
+
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader
+
+from src.ir.src.datamodules.tc_dataset import TCDataset
+from src.ir.src.datamodules.tc_transform import MLB
+
+from src.ir.src.preprocessing.data_cleaning import batch_clean_text
+
+classes = {0: "name", 1: "address_key", 2: "address_value", 3: "number_contact_key",
+           4: "number_contact_value", 5: "link_contact_key", 6: "link_contact_value", 7: "attribute"}
+
+class TCDataModule(pl.LightningDataModule):
+    def __init__(self,
+                 tokenizer: str = "vinai/phobert-base",
+                 data_dir: str = "data",
+                 train_test_split: float = 0.1,
+                 batch_size: int = 64,
+                 num_workers: int = 4,
+                 pin_memory: bool = False,
+                 seed: int = 42):
+        super().__init__()
+
+        self.save_hyperparameters(logger=False)
+
+        self.mlb = MLB()
+
+        self.prepare_data()
+
+    @property
+    def num_classes(self) -> int:
+        return 5
+
+    def prepare_data(self):
+        # load data
+        if self.hparams.data_dir is not None:
+            data = self.hparams.data_dir
+            data_text = []
+            for key, value in data.items():
+                for text in value:
+                    data_text.append(text)
+            data_text = batch_clean_text(data_text)
+            self.x_test = np.array(data_text)
+            self.y_test = None
+        print(len(self.x_test), self.x_test)
+
+    def setup(self, stage="fit"):
+        if stage == "fit" or stage is None:
+            self.train_dataset = TCDataset(text=self.x_train,
+                                           labels=self.y_train,
+                                           tokenizer=self.hparams.tokenizer)
+
+            self.val_dataset = TCDataset(text=self.x_val,
+                                         labels=self.y_val,
+                                         tokenizer=self.hparams.tokenizer)
+
+        if stage == "predict" or stage == "test" or stage is None:
+            self.test_dataset = TCDataset(text=self.x_test,
+                                          labels=self.y_test,
+                                          tokenizer=self.hparams.tokenizer)
+
+    def train_dataloader(self):
+        if self.train_dataset is not None:
+            return DataLoader(self.train_dataset,
+                              batch_size=self.hparams.batch_size,
+                              shuffle=True,
+                              num_workers=self.hparams.num_workers,
+                              pin_memory=self.hparams.pin_memory)
+
+    def val_dataloader(self):
+        if self.val_dataset is not None:
+            return DataLoader(self.val_dataset,
+                              batch_size=self.hparams.batch_size,
+                              shuffle=False,
+                              num_workers=self.hparams.num_workers,
+                              pin_memory=self.hparams.pin_memory)
+
+    def test_dataloader(self):
+        if self.test_dataset is not None:
+            return DataLoader(self.test_dataset,
+                              batch_size=self.hparams.batch_size,
+                              shuffle=False,
+                              num_workers=self.hparams.num_workers,
+                              pin_memory=self.hparams.pin_memory)
+
+    def predict_dataloader(self):
+        if self.test_dataset is not None:
+            return DataLoader(self.test_dataset,
+                              batch_size=self.hparams.batch_size,
+                              shuffle=False,
+                              num_workers=self.hparams.num_workers,
+                              pin_memory=self.hparams.pin_memory)
diff --git a/src/ir/src/datamodules/tc_dataset.py b/src/ir/src/datamodules/tc_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..19654940896540a7c556206a72ac0c77d29dbc0c
--- /dev/null
+++ b/src/ir/src/datamodules/tc_dataset.py
@@ -0,0 +1,44 @@
+from torch.utils.data import Dataset
+
+import torch
+from transformers import AutoTokenizer
+
+
+class TCDataset(Dataset):
+    def __init__(self, text, labels=None, tokenizer=None):
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer)
+        self.text = text
+        self.labels = labels
+
+    def _tokenize_function(self, text):
+        return self.tokenizer(
+            text,
+            padding='max_length',
+            max_length=64,
+            return_token_type_ids=True,
+            return_attention_mask=True,  # Differentiates padded vs normal token
+            truncation=True,  # Truncate data beyond max length
+            return_tensors='pt'  # PyTorch Tensor format
+        )
+
+    def __len__(self):
+        return len(self.text)
+
+    def __getitem__(self, item_idx):
+        text = self.text[item_idx]
+        inputs = self._tokenize_function(text)
+
+        input_ids = inputs['input_ids'].flatten()
+        attn_mask = inputs['attention_mask'].flatten()
+        token_type_ids = inputs['token_type_ids'].flatten()
+        # position_ids = inputs['position_ids']
+        # head_mask = inputs['head_mask']
+
+        return {
+            'input_ids': input_ids,
+            'attention_mask': attn_mask,
+            'token_type_ids': token_type_ids,
+            # 'position_ids': position_ids,
+            # 'head_mask': head_mask,
+            # 'label': torch.tensor(self.labels[item_idx], dtype=torch.float)
+        }
diff --git a/src/ir/src/datamodules/tc_transform.py b/src/ir/src/datamodules/tc_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..f30ffae81b6c6f1decec73d5d4dfb4c0b291d8df
--- /dev/null
+++ b/src/ir/src/datamodules/tc_transform.py
@@ -0,0 +1,22 @@
+from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer, LabelEncoder
+
+
+class MLB():
+    def __init__(self):
+        super().__init__()
+
+        self.label = ['name', 'address', 'phone', 'link', 'attribute']
+        # self.multiLabelBinarizer = LabelEncoder()
+        # self.multiLabelBinarizer.fit(self.label)
+
+        self.multiLabelBinarizer = LabelBinarizer()
+        self.multiLabelBinarizer.fit(self.label)
+
+        # self.multiLabelBinarizer = MultiLabelBinarizer()
+        # self.multiLabelBinarizer.fit([self.label])
+
+    def transform(self, y):
+        return self.multiLabelBinarizer.transform(y)
+
+    def inverse_transform(self, y):
+        return self.multiLabelBinarizer.inverse_transform(y)
diff --git a/src/ir/src/models/__init__.py b/src/ir/src/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/src/models/__pycache__/__init__.cpython-38.pyc b/src/ir/src/models/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d571d016b618e724c38bf001dd94233ea0f131a4
Binary files /dev/null and b/src/ir/src/models/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/ir/src/models/__pycache__/tc_classification_task.cpython-38.pyc b/src/ir/src/models/__pycache__/tc_classification_task.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..73bc85059dbffcf38f01daafc7536406b25d4b8c
Binary files /dev/null and b/src/ir/src/models/__pycache__/tc_classification_task.cpython-38.pyc differ
diff --git a/src/ir/src/models/__pycache__/tc_classifier.cpython-38.pyc b/src/ir/src/models/__pycache__/tc_classifier.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..40dd04c3cba0ccb0be689160c7da808eb7c52e3d
Binary files /dev/null and b/src/ir/src/models/__pycache__/tc_classifier.cpython-38.pyc differ
diff --git a/src/ir/src/models/components/__init__.py b/src/ir/src/models/components/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/src/models/components/__pycache__/__init__.cpython-38.pyc b/src/ir/src/models/components/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a4b7e20adcac48b09b352eb52b5a8bfb3cb05319
Binary files /dev/null and b/src/ir/src/models/components/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/ir/src/models/components/__pycache__/mixlinear.cpython-38.pyc b/src/ir/src/models/components/__pycache__/mixlinear.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c0525b46c10853b8c24f037785f8c05bb12d6d13
Binary files /dev/null and b/src/ir/src/models/components/__pycache__/mixlinear.cpython-38.pyc differ
diff --git a/src/ir/src/models/components/__pycache__/mixout.cpython-38.pyc b/src/ir/src/models/components/__pycache__/mixout.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2d7db640bf0f9f9b5d1df4d70f4efc11414d3f7
Binary files /dev/null and b/src/ir/src/models/components/__pycache__/mixout.cpython-38.pyc differ
diff --git a/src/ir/src/models/components/mixlinear.py b/src/ir/src/models/components/mixlinear.py
new file mode 100644
index 0000000000000000000000000000000000000000..942aaec8b93c596cf4f20d6bf84a18787e3e3fde
--- /dev/null
+++ b/src/ir/src/models/components/mixlinear.py
@@ -0,0 +1,51 @@
+##++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Cheolhyoung Lee
+## Department of Mathematical Sciences, KAIST
+## Email: cheolhyoung.lee@kaist.ac.kr
+## Implementation of mixout from https://arxiv.org/abs/1909.11299
+## "Mixout: Effective Regularization to Finetune Large-scale Pretrained Language Models"
+##++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+import math
+import torch
+import torch.nn.init as init
+import torch.nn.functional as F
+
+from torch.nn import Parameter
+
+from src.ir.src.models.components.mixout import mixout
+
+
+class MixLinear(torch.nn.Module):
+    __constants__ = ['bias', 'in_features', 'out_features']
+    # If target is None, nn.Sequential(nn.Linear(m, n), MixLinear(m', n', p)) 
+    # is equivalent to nn.Sequential(nn.Linear(m, n), nn.Dropout(p), nn.Linear(m', n')).
+    # If you want to change a dropout layer to a mixout layer, 
+    # you should replace nn.Linear right after nn.Dropout(p) with Mixout(p) 
+    def __init__(self, in_features, out_features, bias=True, target=None, p=0.0):
+        super(MixLinear, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.weight = Parameter(torch.Tensor(out_features, in_features))
+        if bias:
+            self.bias = Parameter(torch.Tensor(out_features))
+        else:
+            self.register_parameter('bias', None)
+        self.reset_parameters()
+        self.target = target
+        self.p = p
+    
+    def reset_parameters(self):
+        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+        if self.bias is not None:
+            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
+            bound = 1 / math.sqrt(fan_in)
+            init.uniform_(self.bias, -bound, bound)
+            
+    def forward(self, input):
+        return F.linear(input, mixout(self.weight, self.target, 
+                                      self.p, self.training), self.bias)
+
+    def extra_repr(self):
+        type = 'drop' if self.target is None else 'mix' 
+        return '{}={}, in_features={}, out_features={}, bias={}'.format(type+"out", self.p,
+            self.in_features, self.out_features, self.bias is not None)
\ No newline at end of file
diff --git a/src/ir/src/models/components/mixout.py b/src/ir/src/models/components/mixout.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec9b5813147077f115bb9f5164634da5c60b8f76
--- /dev/null
+++ b/src/ir/src/models/components/mixout.py
@@ -0,0 +1,70 @@
+##++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Cheolhyoung Lee
+## Department of Mathematical Sciences, KAIST
+## Email: cheolhyoung.lee@kaist.ac.kr
+## Implementation of mixout from https://arxiv.org/abs/1909.11299
+## "Mixout: Effective Regularization to Finetune Large-scale Pretrained Language Models"
+##++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+from torch.autograd.function import InplaceFunction
+
+
+class Mixout(InplaceFunction):
+    # target: a weight tensor mixes with a input tensor
+    # A forward method returns 
+    # [(1 - Bernoulli(1 - p) mask) * target + (Bernoulli(1 - p) mask) * input - p * target]/(1 - p) 
+    # where p is a mix probability of mixout.
+    # A backward returns the gradient of the forward method.
+    # Dropout is equivalent to the case of target=None. 
+    # I modified the code of dropout in PyTorch. 
+    @staticmethod
+    def _make_noise(input):
+        return input.new().resize_as_(input)
+
+    @classmethod
+    def forward(cls, ctx, input, target=None, p=0.0, training=False, inplace=False):
+        if p < 0 or p > 1:
+            raise ValueError("A mix probability of mixout has to be between 0 and 1,"
+                             " but got {}".format(p))
+        if target is not None and input.size() != target.size():
+            raise ValueError("A target tensor size must match with a input tensor size {},"
+                             " but got {}". format(input.size(), target.size()))
+        ctx.p = p    
+        ctx.training = training
+        
+        if target is None:
+            target = cls._make_noise(input)
+            target.fill_(0)
+        target = target.to(input.device)
+
+        if inplace:
+            ctx.mark_dirty(input)
+            output = input
+        else:
+            output = input.clone()
+        
+        if ctx.p == 0 or not ctx.training:
+            return output
+        
+        ctx.noise = cls._make_noise(input)
+        if len(ctx.noise.size()) == 1:
+            ctx.noise.bernoulli_(1 - ctx.p)
+        else:
+            ctx.noise[0].bernoulli_(1 - ctx.p)
+            ctx.noise = ctx.noise[0].repeat(input.size()[0], *([1] * (len(input.size())-1)))
+        ctx.noise.expand_as(input)
+        
+        if ctx.p == 1:
+            output = target.clone()
+        else:
+            output = ((1 - ctx.noise) * target + ctx.noise * output - ctx.p * target) / (1 - ctx.p)
+        return output
+        
+    @staticmethod
+    def backward(ctx, grad_output):
+        if ctx.p > 0 and ctx.training:
+            return grad_output * ctx.noise, None, None, None, None
+        else:
+            return grad_output, None, None, None, None
+
+def mixout(input, target=None, p=0.0, training=False, inplace=False):
+    return Mixout.apply(input, target, p, training, inplace)
diff --git a/src/ir/src/models/tc_classification_task.py b/src/ir/src/models/tc_classification_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d2e4e3007834295e5966c105c2eeceb7a968738
--- /dev/null
+++ b/src/ir/src/models/tc_classification_task.py
@@ -0,0 +1,77 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+
+import torch
+import pytorch_lightning as pl
+
+from transformers import PreTrainedModel, TFPreTrainedModel
+
+
+class TCClassificationTask(pl.LightningModule):
+    def __init__(self,
+                 model: Union["PreTrainedModel", "TFPreTrainedModel"],
+                 function_to_apply: str = "None",
+                 return_all_scores: bool = True,
+                 ):
+        super().__init__()
+        
+        self.model = model
+
+        if function_to_apply == "sigmoid":
+            self.function_to_apply = torch.sigmoid
+        elif function_to_apply == "softmax":
+            self.function_to_apply = torch.softmax
+        else:
+            self.function_to_apply = lambda x: x
+
+        self.return_all_scores = return_all_scores
+
+    def forward(self,
+                input_ids=None,
+                attention_mask=None,
+                token_type_ids=None,
+                position_ids=None,
+                head_mask=None,
+                inputs_embeds=None):
+        logits = self.model(input_ids=input_ids, 
+                            attention_mask=attention_mask,
+                            token_type_ids=token_type_ids,
+                            position_ids=position_ids,
+                            head_mask=head_mask,
+                            inputs_embeds=inputs_embeds,)
+        
+        return logits
+
+    def postprocess(self, logits, function_to_apply):
+        if function_to_apply is None:
+            function_to_apply = lambda x: x
+        
+        logits = function_to_apply(logits)
+        return logits
+    
+    def predict_step(self, batch, batch_idx):
+        logits = self(batch['input_ids'], 
+                      batch['attention_mask'],
+                      batch['token_type_ids'])
+        
+        scores = self.postprocess(logits, self.function_to_apply)
+        if not self.return_all_scores:
+            scores = torch.argmax(scores, dim=1)
+        
+        if "label" in batch.keys():
+            return scores.data.cpu().numpy(), batch["label"].data.cpu().numpy()
+        
+        return scores.data.cpu().numpy()
+    
+    def test_step(self, batch, batch_idx):
+        logits = self(batch['input_ids'], 
+                      batch['attention_mask'],
+                      batch['token_type_ids'])
+        
+        scores = self.postprocess(logits, self.function_to_apply)
+        if not self.return_all_scores:
+            scores = torch.argmax(scores, dim=1)
+        
+        if "label" in batch.keys():
+            return scores.data.cpu().numpy(), batch["label"].data.cpu().numpy()
+        
+        return scores.data.cpu().numpy()
\ No newline at end of file
diff --git a/src/ir/src/models/tc_classifier.py b/src/ir/src/models/tc_classifier.py
new file mode 100644
index 0000000000000000000000000000000000000000..196f6054b79822cc6ffed866827031cc4eae90bb
--- /dev/null
+++ b/src/ir/src/models/tc_classifier.py
@@ -0,0 +1,139 @@
+import torch
+from torch import nn
+import pytorch_lightning as pl
+
+from transformers import (AutoModel, AdamW, 
+                          get_linear_schedule_with_warmup,
+                          get_constant_schedule_with_warmup)
+
+from sklearn.metrics import average_precision_score
+
+from src.ir.src.models.components.mixlinear import MixLinear
+
+
+def mean_pooling(output, attention_mask):
+    token_embeddings = output[0] #First element of output contains all token embeddings
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+
+class TCClassifier(pl.LightningModule):
+    # Set up the classifier
+    def __init__(self,
+                 model_name: str, 
+                 n_classes: int = 5, 
+                 lr: float = 1e-5,
+                 scheduler_type: str = None,
+                 max_steps: int = 10000,
+                 weight_decay: float = 0.01,
+                 classifier_dropout: float = 0.1,
+                 mixout: bool = False,
+                 freeze_encoder: bool = False):
+        super().__init__()
+        self.save_hyperparameters()
+
+        # define Bert-based encoder
+        self.encoder = AutoModel.from_pretrained(model_name)
+        # freezing Bert
+        if freeze_encoder:
+            for param in self.encoder.parameters():
+                param.requires_grad = False
+        
+        # Pooling on-top of word embedding
+        if model_name.startswith("sentence-transformers"):
+            # According to official guideline, we have to apply the 
+            # pooling-operation on-top of the contextualized word embeddings.
+            # Details: https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
+            self.pooling = mean_pooling
+        else:
+            # For vanilla BERT-based, simply return 
+            # pooler_output from embeddings.
+            self.pooling = lambda x, y: x[1]
+        
+        # Header for downstream task
+        if mixout:
+            # using Mixout
+            self.dropout = nn.Dropout(p=0)
+            self.roi_head = MixLinear(self.encoder.config.hidden_size, n_classes, True, None, classifier_dropout)
+        else:
+            self.dropout = nn.Dropout(p=classifier_dropout)
+            self.roi_head = nn.Linear(self.encoder.config.hidden_size, n_classes)
+
+        self.lr = lr
+        self.scheduler_type = scheduler_type
+        self.max_steps = max_steps
+        self.weight_decay = weight_decay
+        self.criterion = nn.BCEWithLogitsLoss()
+    
+    def configure_optimizers(self):
+        optimizer = AdamW(self.parameters(), 
+                          lr=self.lr, 
+                          weight_decay=self.weight_decay)
+        if self.scheduler_type is None:
+            return optimizer
+        
+        warmup_steps = 0.1 * self.max_steps # default warmup step to 10%
+        
+        if self.scheduler_type == "constant_schedule_with_warmup":
+            scheduler = get_constant_schedule_with_warmup(optimizer, warmup_steps)
+        elif self.scheduler_type == "linear_schedule_with_warmup":
+            scheduler = get_linear_schedule_with_warmup(optimizer, warmup_steps, self.max_steps)
+        else:
+            raise ValueError("Scheduler {0} is not supported".format(self.scheduler_type))
+
+        return {
+            "optimizer": optimizer,
+            "lr_scheduler": {
+                "scheduler": scheduler,
+                "interval": "step",
+                "frequency": 1,
+            }
+        }
+        
+    def forward(self, 
+                input_ids=None,
+                attention_mask=None,
+                token_type_ids=None,
+                position_ids=None,
+                head_mask=None,
+                inputs_embeds=None):
+        
+        outputs = self.encoder(input_ids=input_ids, 
+                              attention_mask=attention_mask,
+                              token_type_ids=token_type_ids,
+                              position_ids=position_ids,
+                              head_mask=head_mask,
+                              inputs_embeds=inputs_embeds,)
+
+        pooled_output  = self.pooling(outputs, attention_mask)
+        pooled_output  = self.dropout(pooled_output)
+        logits = self.roi_head(pooled_output)
+        return logits
+    
+    def training_step(self, batch, batch_idx):       
+        logits = self(batch['input_ids'], 
+                      batch['attention_mask'],
+                      batch['token_type_ids'])
+        
+        labels = batch['label']
+        loss = self.criterion(logits, labels)
+        self.log('train_loss', loss, prog_bar=True, logger=True)
+        
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        logits = self(batch['input_ids'], 
+                      batch['attention_mask'],
+                      batch['token_type_ids'])
+        labels = batch['label']
+        
+        loss = self.criterion(logits, labels)
+        self.log('val_loss', loss , prog_bar=True, logger=True)
+
+        # evaluate accuracy
+        logits = torch.sigmoid(logits)
+        logits = logits.data.cpu().numpy()
+
+        labels = labels.data.cpu().numpy()
+        
+        mAP = average_precision_score(labels.flatten(), logits.flatten())
+        self.log('val_mAP', mAP , prog_bar=True, logger=True)
\ No newline at end of file
diff --git a/src/ir/src/preprocessing/README.md b/src/ir/src/preprocessing/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e18b06a38b2f80dd1592d84b3ddadc88c6da263b
--- /dev/null
+++ b/src/ir/src/preprocessing/README.md
@@ -0,0 +1,64 @@
+# Data pipeline
+
+# Language Model
+
+## Data Preparation
+
+For Facebook post:
+
+```
+python prepare_mlm_data.py -o mlm_posts.txt -C posts -FT content -FI postID --mention rm --emoji rm --url rm --remove-repetition-dash --sentence-seg --min-text-length 10
+```
+
+This code snipet will remove mention, remove emoji, remove url, remove repetition chars. Parameters `--sentence-seg` and `--min-text-length 10` are used while creating dataset for learning language model. Otherwise, do not use it.
+
+For Facebook comments:
+
+```
+python prepare_mlm_data.py -o mlm_comments.txt -C comments -FT contentHTML -FI commentID --mention rm --emoji cv --url rm --remove-repetition-dash --sentence-seg --min-text-length 10
+```
+
+This code snipet will remove mention, translate emoji to Vietnamese text, remove url, remove repetition chars. Parameters `--sentence-seg` and `--min-text-length 10` are used while creating dataset for learning language model. Otherwise, do not use it.
+
+# Multi-label Post Classification
+
+## Training Data Preparation
+
+Prepare training data:
+
+```
+python prepare_mlc_data.py -i [PATH_TO_ANNOTATED_JSON] -o [PATH_TO_TRAINING_JSON] --mention rm --emoji rm --url rm --remove-repetition-dash --min-text-length 10
+```
+
+## Inferencing
+
+For inferencing, call `batch_clean` (for list of sequence) or `clean` (for single sequence) with following parameters:
+
+```
+mention='rm'
+url='rm'
+emoji='rm'
+remove_repetition_char=False
+remove_repetition_dash=True
+sentence_seg=False
+```
+
+# Pipeline
+
+## Downloading data
+
+For latest posts and comments:
+
+```
+python mongo_helper.py -o [OUTPUT_DIR] -C posts
+python mongo_helper.py -o [OUTPUT_DIR] -C comments
+```
+
+## Prepare data for inferencing
+
+
+```
+python pipeline_prepare_data.py  -i [PATH_TO_DOWNLOADED_DATA] -o [OUTPUT_DIR] -t [TASK_NAME]
+```
+
+where `TASK_NAME` may be either `post_cls`, `post_ner`, and `comments_cls`.
\ No newline at end of file
diff --git a/src/ir/src/preprocessing/__pycache__/data_cleaning.cpython-38.pyc b/src/ir/src/preprocessing/__pycache__/data_cleaning.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a7e96a4f553123ea1911b821d83c6fd6901512b9
Binary files /dev/null and b/src/ir/src/preprocessing/__pycache__/data_cleaning.cpython-38.pyc differ
diff --git a/src/ir/src/preprocessing/cat_mapping.py b/src/ir/src/preprocessing/cat_mapping.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0d0923f20d41fff2cb3f6fa9731452099bcfcfd
--- /dev/null
+++ b/src/ir/src/preprocessing/cat_mapping.py
@@ -0,0 +1,65 @@
+import sys
+
+from numpy.core.numeric import NaN
+sys.path.insert(0, '../Text-based-PoIs-Classification')
+
+from configs import PATH_TO_CAT_MAPPING
+
+import pandas as pd
+
+def cat_l1():
+    corrected_label = {
+        'Bênh viện chuyên khoa' : 'Bệnh viện chuyên khoa',
+        'Công ty quy mô trung bình và lơn' : 'Công ty quy mô trung bình và lớn',
+        'Công ty quy mô trung bình và lớn ' : 'Công ty quy mô trung bình và lớn',
+        'Cột Nước Chữa Cháy (PCCC)' : '-',
+        'Cột Nước Chữa Cháy(PCCC)' : '-',
+        'Di tích văn hóa, lịch sử': 'Điểm du lịch nhân văn',
+        'Dịch vụ xe máy (Sửa xe, bán xe)' : 'Dịch vụ xe máy',
+        'Dịch vụ ô tô (Gara, showroom)' : 'Dịch vụ ô tô',
+        'Giao lộ' : '-',
+        'HĐND, UBND, Công sở': 'HĐND, UBND',
+        'Khách sạn 3,4,5 sao ': 'Khách sạn 3,4,5 sao',
+        'Ngân hàng': 'Ngân hàng thương mại',
+        'Ngân hàng nhà nước': 'Ngân hàng nhà nước và chính sách',
+        'Nhà văn hóa, trụ sở thôn, khu vực': 'Nhà văn hóa, điểm khai báo tạm trú tạm vắng',
+        'Nhà văn hóa, trụ sở thôn, khu vực ': 'Nhà văn hóa, điểm khai báo tạm trú tạm vắng',
+        'Nhà văn hóa, trụ sở thôn, khu vực    ': 'Nhà văn hóa, điểm khai báo tạm trú tạm vắng',
+        'Nhà văn hóa, trụ sở thôn,khu vực': 'Nhà văn hóa, điểm khai báo tạm trú tạm vắng',
+        'Nhà đân độc lập': 'Nhà dân độc lập',
+        'Phòng khám chuyên khoa ': 'Phòng khám chuyên khoa',
+        'Trung tâm gia sư ': 'Trung tâm gia sư',
+        'Trung tâm tin học ': 'Trung tâm tin học',
+        'Quán cà phê, giải khát theo chuỗi ': 'Quán cà phê, giải khát theo chuỗi',
+        'Trung tâm thể dục thể thao': 'Trung tâm thể dục thể thao.',
+        'Trung tâm tư vấn du học và giới thiệu việc làm ': 'Trung tâm tư vấn du học và giới thiệu việc làm',
+        'Quán Bar, Karaoke, vũ trường, pub': 'Quán bar, karaoke, vũ trường, pub',
+        'Trường tiểu học': 'Tiểu học',
+        'Trường cao đẳng': 'Cao đẳng',
+        'Điểm du lịch, Điểm vui chơi giải trí': 'Trung tâm vui chơi giải trí', # not sure
+        'Nhà dòng': 'Nhà thờ Thiên chúa giáo', # not sure
+        'Nhà công vụ': 'Nhà tập thể, Nhà trong khu tập thể', # not sure
+        'Trạm cấp nước': '-'
+    }
+
+    data = pd.read_excel(PATH_TO_CAT_MAPPING, engine='openpyxl')
+
+    all_cat = data['Sort()'].tolist()
+    l1_cat = data['Merge_HungLV'].tolist()
+    
+    d = {'all_cat': all_cat, 'l1_cat': l1_cat}
+    d = pd.DataFrame(d)
+    d = d.dropna()
+    
+    dict_d = {}
+    for index, row in d.iterrows():
+        if row['all_cat'] in corrected_label:
+            row_ = corrected_label[row['all_cat']]
+        else:
+            row_ = row['all_cat']
+        label_ = row['l1_cat'].split(',')
+        for i in range(len(label_)):
+            label_[i] = label_[i].strip()
+        dict_d[row_] = label_
+    
+    return dict_d
diff --git a/src/ir/src/preprocessing/data_cleaning.py b/src/ir/src/preprocessing/data_cleaning.py
new file mode 100644
index 0000000000000000000000000000000000000000..31d7e622d636a5c8bd7f289301f49f27f7396e42
--- /dev/null
+++ b/src/ir/src/preprocessing/data_cleaning.py
@@ -0,0 +1,98 @@
+from vncorenlp import VnCoreNLP
+dict_map = {
+    "òa": "oà",
+    "Òa": "Oà",
+    "ÒA": "OÀ",
+    "óa": "oá",
+    "Óa": "Oá",
+    "ÓA": "OÁ",
+    "ỏa": "oả",
+    "Ỏa": "Oả",
+    "ỎA": "OẢ",
+    "õa": "oã",
+    "Õa": "Oã",
+    "ÕA": "OÃ",
+    "ọa": "oạ",
+    "Ọa": "Oạ",
+    "ỌA": "OẠ",
+    "òe": "oè",
+    "Òe": "Oè",
+    "ÒE": "OÈ",
+    "óe": "oé",
+    "Óe": "Oé",
+    "ÓE": "OÉ",
+    "ỏe": "oẻ",
+    "Ỏe": "Oẻ",
+    "ỎE": "OẺ",
+    "õe": "oẽ",
+    "Õe": "Oẽ",
+    "ÕE": "OẼ",
+    "ọe": "oẹ",
+    "Ọe": "Oẹ",
+    "ỌE": "OẸ",
+    "ùy": "uỳ",
+    "Ùy": "Uỳ",
+    "ÙY": "UỲ",
+    "úy": "uý",
+    "Úy": "Uý",
+    "ÚY": "UÝ",
+    "ủy": "uỷ",
+    "Ủy": "Uỷ",
+    "ỦY": "UỶ",
+    "ũy": "uỹ",
+    "Ũy": "Uỹ",
+    "ŨY": "UỸ",
+    "ụy": "uỵ",
+    "Ụy": "Uỵ",
+    "ỤY": "UỴ",
+}
+
+
+def replace_all(text, dict_map):
+    for i, j in dict_map.items():
+        text = text.replace(i, j)
+    return text
+
+
+def clean_text(text,
+               annotator,
+               sentence_seg=False):
+    # break sentences by <p> tag
+    text = text.strip()
+    text = replace_all(text, dict_map)
+
+    # word segment
+    word_segmented_sentences = annotator.tokenize(text)
+    sentences = [" ".join(sent) for sent in word_segmented_sentences]
+
+    data = []
+    if sentence_seg:
+        for i, sent in enumerate(sentences):
+            # we must mask_emoji after tokenization because
+            # tokenizer might be misleading of emoji text
+            sent = sent.strip()
+
+            data.append((sent, i))
+    else:
+        text = " ".join(sentences)
+        text = text.strip()
+
+        data.append((text, 0))
+
+    return data
+
+
+def batch_clean_text(documents,
+                     sentence_seg=False):
+    annotator = VnCoreNLP("./VnCoreNLP/VnCoreNLP-1.2.jar",
+                          annotators="wseg",
+                          max_heap_size='-Xmx500m')
+    data = []
+    for text in documents:
+        sentences = clean_text(text,
+                               annotator,
+                               sentence_seg=sentence_seg)
+        for sent in sentences:
+            data.append(sent[0])
+
+    return data
diff --git a/src/ir/src/preprocessing/main.py b/src/ir/src/preprocessing/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..f120c465140fc1f59bb3e55b2999dde7ddccebb4
--- /dev/null
+++ b/src/ir/src/preprocessing/main.py
@@ -0,0 +1,27 @@
+import pipeline_prepare_data
+
+import argparse
+
+def _parse_args():
+    parser = argparse.ArgumentParser(description="POI classification based on tetx")
+    # model parameters
+    parser.add_argument('--data-path', type=str, default=None,
+                        help='Path to data')
+    parser.add_argument('--out-path', type=str, default=None,
+                        help='Path to output data')
+    
+    args = parser.parse_args()
+    return args
+
+def handle(args):
+    data_file = args.data_path
+    out_dir = args.out_path
+    
+    pipeline_prepare_data.preprocess_data(data_file, out_dir)
+
+def main():
+    args = _parse_args()
+    handle(args)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/src/ir/src/preprocessing/pipeline_prepare_data.py b/src/ir/src/preprocessing/pipeline_prepare_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..62efa9d157fc3bd811a3bdaab32376620074c5d1
--- /dev/null
+++ b/src/ir/src/preprocessing/pipeline_prepare_data.py
@@ -0,0 +1,55 @@
+import os
+import json
+from vncorenlp import VnCoreNLP
+
+from cat_mapping import cat_l1
+from data_cleaning import clean_text  # type: ignore
+from configs import *  # type: ignore
+
+def preprocess_data(in_file, out_dir):
+    with open(in_file, 'r', encoding="utf-8") as fin:
+        documents = json.load(fin)
+
+    # instaniate a vncorenlp annotator
+    annotator = VnCoreNLP(VNCORENLP_PATH,
+                          annotators="wseg",
+                          max_heap_size='-Xmx500m')
+
+    remove_repetition_char = False
+    remove_repetition_dash = False
+    
+    sentence_seg = False
+    field_id = "_id"
+    field_text = "name"
+    cat_level_1 = cat_l1()
+    data = []
+    for doc in documents:
+        text = doc[field_text]
+        fid = doc[field_id]
+
+        print(text)
+        
+        text = clean_text(text,
+                          annotator=annotator,
+                          remove_repetition_char=remove_repetition_char,
+                          remove_repetition_dash=remove_repetition_dash,
+                          sentence_seg=sentence_seg)
+
+        # label = cat_level_1[doc["poicat"]]
+        for t in text:
+            item = {
+                "fid": fid,
+                "text": t[0],
+                "offset": t[1],
+                "name": doc["name"],
+                "poicat": doc["poicat"],
+                "label_l1": doc["label"]
+            }
+            data.append(item)
+
+    basename = os.path.basename(in_file).split(".")[0]
+    out_file = os.path.join(out_dir, "%s_.json" % (basename))
+    with open(out_file, 'w', encoding="utf-8") as fou:
+        json.dump(data, fou, ensure_ascii=False)
+
+    return out_file
diff --git a/src/ir/src/preprocessing/pre_process.py b/src/ir/src/preprocessing/pre_process.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0fc6c508ba762b498e1bb6b64658f78c477a0ae
--- /dev/null
+++ b/src/ir/src/preprocessing/pre_process.py
@@ -0,0 +1,111 @@
+import emoji as emoji  # type: ignore
+
+import re
+RE_EMOJI = re.compile(r'[\u263a-\U0001f973]')
+RE_MENTION = re.compile(r'<a[^>]*>(.*?)</a>')
+RE_HTML_TAG = re.compile(r'<.*?>')
+RE_HASHTAG = re.compile(r'#([^\W_]{1,50})', re.U)
+RE_URL_1 = re.compile(r'http\S+')
+RE_URL_2 = re.compile(r'www.+')
+RE_REPETITION_CHAR = re.compile(r'([a-zA-ZÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚĂĐĨŨƠàáâãèéêìíòóôõùúăđĩũơƯĂẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼỀỀỂưăạảấầẩẫậắằẳẵặẹẻẽềềểỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪễệỉịọỏốồổỗộớờởỡợụủứừỬỮỰỲỴÝỶỸửữựỳỵỷỹ])\1*', re.U)
+RE_REPETITION_CHAR_OLD = re.compile(r'([\w])\1*')
+RE_REPETITION_DASH = re.compile(r'([-.])\1*', re.U)
+
+
+def mask_emoji(text,
+               convert=False,
+               delimiters=(" ", " "),
+               language='vi'):
+    """Mask emoji from text.
+
+    Args:
+        text (str): text contain emojies
+        convert (bool, optional): Convert emojies to text instead \
+            of removing. Defaults to False.
+        language (str): Choose language of emoji name. Default to 'vi'.
+        delimiters (tuple, optional): Delimiter for emojies. \
+            Used when convert=True. Default to ("", "").
+
+    Returns:
+        str: text without graphical emojies
+    """
+    if convert:
+        text = emoji.demojize(text, delimiters=delimiters, language=language)
+    else:
+        text = emoji.get_emoji_regexp().sub(r'', text)
+
+    return text.strip()
+
+
+def mask_mention(text, repl_string="@USER"):
+    text = re.sub(RE_MENTION, repl_string, text)
+    return text.strip()
+
+
+def mask_url(text, repl_string="HTTPURL"):
+    text = re.sub(RE_URL_1, repl_string, text)
+    text = re.sub(RE_URL_2, repl_string, text)
+    return text.strip()
+
+
+def replace_br(text):
+    text = text.replace("<br>", ".")
+    return text
+
+
+def remove_html(text):
+    text = re.sub(RE_HTML_TAG, r'', text)
+    return text.strip()
+
+
+def remove_hashtag(text):
+    text = re.sub(RE_HASHTAG, r'', text)
+    return text.strip()
+
+
+def remove_repetition(text, target="dash"):
+    if target == "dash":
+        text = re.sub(RE_REPETITION_DASH, r'\1', text)    
+    else:
+        text = re.sub(RE_REPETITION_CHAR, r'\1', text)
+    return text
+
+
+if __name__ == "__main__":
+    print(
+        mask_emoji(r'🎁TẶNG QUÀ & SĂN KHUYẾN MÃI TỚI 50% KHI XEM LIVESTREAM \
+                    CÙNG 7-ELEVEN🔥', True))
+    print(
+        mask_emoji(r'🎁TẶNG QUÀ & SĂN KHUYẾN MÃI TỚI 50% KHI XEM LIVESTREAM \
+                    CÙNG 7-ELEVEN🔥'))
+
+    print(
+        mask_mention(r'<a href="/tran.pham.334839?refid=52&amp;__tn__=R">\
+            Trân Phạm</a> mai t giả bộ lên m làm bài r mua mấy này ăn'))
+
+    print(
+        mask_mention(r'<a href="/tran.pham.334839?refid=52&amp;__tn__=R">\
+            Trân Phạm</a> mai t giả bộ lên m làm bài r mua mấy này ăn', ''))
+
+    print(
+        remove_html(r'<span class="bq"><span class="br" style="height: 16px; \
+            width: 16px; font-size: 16px; background-image: \
+            url(&quot;https://static.xx.fbcdn.net/images/emoji.php/v9/te/1/16/\
+            1f622.png&quot;)">😢</span></span> hok biết ly thuỷ tinh có về lại\
+            hok <a href="/tran.pham.334839?refid=52&amp;__tn__=R">Trân Phạm\
+            </a>'))
+
+    print(remove_hashtag(r'#love #yêu #3000 valentine hạnh phúc'))
+    print(mask_url("Xem them tai http://www.abc.xyz"))
+    print(mask_url("Xem them tai https://www.abc.xyz"))
+    print(mask_url("Xem them tai http://abc.xyz"))
+    print(mask_url("Xem them tai https://abc.xyz"))
+    print(mask_url("Xem them tai www.abc.xyz"))
+
+    print(remove_repetition("Xeeeeeem điiiiiiiii", "char"))
+    print(remove_repetition("Cái xoong", "char"))
+    print(remove_repetition("yeeeeeeuuuuuuu yêêêu yêêu yêu 1000", "char"))
+    print(remove_repetition("--------------", "dash"))
+
+    RE_REPETITION = re.compile(r'(- )\1*', re.U)
+    re.sub(RE_REPETITION, r'\1', "- - - - - - - - adsdads") 
diff --git a/src/ir/src/testing_pipeline.py b/src/ir/src/testing_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..8eb19c6482bb4c5a569cc8620bc9ff68af67a8c8
--- /dev/null
+++ b/src/ir/src/testing_pipeline.py
@@ -0,0 +1,89 @@
+import os
+import hydra
+from typing import List
+
+from omegaconf import DictConfig
+from pytorch_lightning import LightningDataModule, LightningModule, Trainer, seed_everything
+from pytorch_lightning.loggers import TensorBoardLogger
+
+from src.ir.src import utils
+from src.ir.src.utils.evaluate import evaluate_predict
+
+from src.ir.src.datamodules.tc_datamodule import TCDataModule
+from src.ir.src.models.tc_classification_task import TCClassificationTask
+from src.ir.src.models.tc_classifier import TCClassifier
+
+log = utils.get_logger(__name__)
+
+
+def test(config: DictConfig, input, output, img_path) -> None:
+    """Contains minimal example of the testing pipeline. Evaluates given checkpoint on a testset.
+    Args:
+        config (DictConfig): Configuration composed by Hydra.
+    Returns:
+        None
+    """
+
+    # Set seed for random number generators in pytorch, numpy and python.random
+    if config.get("seed"):
+        seed_everything(config.seed, workers=True)
+
+    # Convert relative ckpt path to absolute path if necessary
+    # if not os.path.isabs(config.ckpt_path):
+        # config.ckpt_path = os.path.join(hydra.utils.get_original_cwd(), config.ckpt_path)
+
+    # Init lightning datamodule
+    log.info(f"Instantiating datamodule <{config.datamodule._target_}>")
+    # datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule)
+    datamodule = TCDataModule(config.datamodule.tokenizer,
+                              input,
+                              config.datamodule.train_test_split,
+                              config.datamodule.batch_size,
+                              config.datamodule.num_workers,
+                              config.datamodule.pin_memory,
+                              config.datamodule.seed)
+    datamodule.setup(stage="predict")  # test
+
+    # Init lightning model
+    log.info(f"Instantiating model <{config.model.train._target_}>")
+    # model: LightningModule = hydra.utils.instantiate(config.model.train)
+    model = TCClassifier(config.model.train.model_name,
+                         config.model.train.n_classes,
+                         config.model.train.lr,
+                         config.model.train.scheduler_type,
+                         config.model.train.max_steps,
+                         config.model.train.weight_decay,
+                         config.model.train.classifier_dropout,
+                         config.model.train.mixout,
+                         config.model.train.freeze_encoder)
+    model = model.load_from_checkpoint(checkpoint_path=config.ckpt_path)
+
+    log.info(f"Instantiating testing model <{config.model.test._target_}>")
+    # test_task: LightningModule = hydra.utils.instantiate(config.model.test, model=model)
+    test_task = TCClassificationTask(model,
+                                     config.model.test.function_to_apply,
+                                     config.model.test.return_all_scores)
+
+    # Init lightning loggers
+    logger: List[TensorBoardLogger] = []
+    if "logger" in config:
+        for _, lg_conf in config.logger.items():
+            if "_target_" in lg_conf:
+                log.info(f"Instantiating logger <{lg_conf._target_}>")
+                logger.append(hydra.utils.instantiate(lg_conf))
+
+    # Init lightning trainer
+    log.info(f"Instantiating trainer <{config.trainer._target_}>")
+    trainer: Trainer = hydra.utils.instantiate(config.trainer, logger=logger)
+
+    # Log hyperparameters
+    if trainer.logger:
+        trainer.logger.log_hyperparams({"ckpt_path": config.ckpt_path})
+
+    log.info("Starting testing!")
+    # trainer.test(model=test_task, datamodule=datamodule, verbose=True, ckpt_path=config.ckpt_path)
+
+    results = trainer.predict(model=test_task, datamodule=datamodule)  # ckpt_path=config.ckpt_path
+    path_to_output = os.path.join(output, img_path.split("/")[-1].split(".")[0]+".txt")
+    predicted = evaluate_predict(results, datamodule, path_to_output)
+    return predicted
diff --git a/src/ir/src/training_pipeline.py b/src/ir/src/training_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a8eea6270f087846eff7770663e0cc93038a9ee
--- /dev/null
+++ b/src/ir/src/training_pipeline.py
@@ -0,0 +1,122 @@
+import os
+from typing import List, Optional
+
+import hydra
+from omegaconf import DictConfig
+from pytorch_lightning import (
+    Callback,
+    LightningDataModule,
+    LightningModule,
+    Trainer,
+    seed_everything,
+)
+from pytorch_lightning.loggers import TensorBoardLogger
+
+from src import utils
+
+log = utils.get_logger(__name__)
+
+
+def train(config: DictConfig) -> Optional[float]:
+    """Contains the training pipeline. Can additionally evaluate model on a testset, using best
+    weights achieved during training.
+    Args:
+        config (DictConfig): Configuration composed by Hydra.
+    Returns:
+        Optional[float]: Metric score for hyperparameter optimization.
+    """
+
+    # Set seed for random number generators in pytorch, numpy and python.random
+    if config.get("seed"):
+        seed_everything(config.seed, workers=True)
+
+    # Convert relative ckpt path to absolute path if necessary
+    ckpt_path = config.trainer.get("resume_from_checkpoint")
+    if ckpt_path and not os.path.isabs(ckpt_path):
+        config.trainer.resume_from_checkpoint = os.path.join(
+            hydra.utils.get_original_cwd(), ckpt_path
+        )
+
+    # Init lightning datamodule
+    log.info(f"Instantiating datamodule <{config.datamodule._target_}>")
+    datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule)
+    datamodule.setup(stage="fit")
+
+    # Init lightning model
+    log.info(f"Instantiating model <{config.model.train._target_}>")
+    model: LightningModule = hydra.utils.instantiate(config.model.train)
+
+    # Init lightning callbacks
+    callbacks: List[Callback] = []
+    if "callbacks" in config:
+        for _, cb_conf in config.callbacks.items():
+            if "_target_" in cb_conf:
+                log.info(f"Instantiating callback <{cb_conf._target_}>")
+                callbacks.append(hydra.utils.instantiate(cb_conf))
+
+    # Init lightning loggers
+    logger: List[TensorBoardLogger] = []
+    if "logger" in config:
+        for _, lg_conf in config.logger.items():
+            if "_target_" in lg_conf:
+                log.info(f"Instantiating logger <{lg_conf._target_}>")
+                logger.append(hydra.utils.instantiate(lg_conf))
+
+    # Init lightning trainer
+    log.info(f"Instantiating trainer <{config.trainer._target_}>")
+    trainer: Trainer = hydra.utils.instantiate(
+        config.trainer, callbacks=callbacks, logger=logger, _convert_="partial"
+    )
+
+    # Send some parameters from config to all lightning loggers
+    log.info("Logging hyperparameters!")
+    utils.log_hyperparameters(
+        config=config,
+        model=model,
+        datamodule=datamodule,
+        trainer=trainer,
+        callbacks=callbacks,
+        logger=logger,
+    )
+
+    # Train the model
+    if config.get("train"):
+        log.info("Starting training!")
+        trainer.fit(model=model, datamodule=datamodule)
+
+    # Get metric score for hyperparameter optimization
+    optimized_metric = config.get("optimized_metric")
+    if optimized_metric and optimized_metric not in trainer.callback_metrics:
+        raise Exception(
+            "Metric for hyperparameter optimization not found! "
+            "Make sure the `optimized_metric` in `hparams_search` config is correct!"
+        )
+    score = trainer.callback_metrics.get(optimized_metric)
+
+    # Test the model
+    if config.get("test"):
+        log.info(f"Instantiating testing model <{config.model.test._target_}>")
+        test_task: LightningModule = hydra.utils.instantiate(config.model.test, model=model)
+        ckpt_path = "best"
+        if not config.get("train") or config.trainer.get("fast_dev_run"):
+            ckpt_path = None
+        log.info("Starting testing!")
+        trainer.test(model=test_task, datamodule=datamodule, ckpt_path=ckpt_path)
+
+    # Make sure everything closed properly
+    log.info("Finalizing!")
+    utils.finish(
+        config=config,
+        model=model,
+        datamodule=datamodule,
+        trainer=trainer,
+        callbacks=callbacks,
+        logger=logger,
+    )
+
+    # Print path to best checkpoint
+    if not config.trainer.get("fast_dev_run") and config.get("train"):
+        log.info(f"Best model ckpt at {trainer.checkpoint_callback.best_model_path}")
+
+    # Return metric score for hyperparameter optimization
+    return score
\ No newline at end of file
diff --git a/src/ir/src/utils/__init__.py b/src/ir/src/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2c0969310d15fac4a8c09d006f00a91a86e4c6b
--- /dev/null
+++ b/src/ir/src/utils/__init__.py
@@ -0,0 +1,160 @@
+import logging
+import warnings
+from typing import List, Sequence
+
+import pytorch_lightning as pl
+import rich.syntax
+import rich.tree
+from omegaconf import DictConfig, OmegaConf
+from pytorch_lightning.utilities import rank_zero_only
+
+
+def get_logger(name=__name__) -> logging.Logger:
+    """Initializes multi-GPU-friendly python command line logger."""
+
+    logger = logging.getLogger(name)
+
+    # this ensures all logging levels get marked with the rank zero decorator
+    # otherwise logs would get multiplied for each GPU process in multi-GPU setup
+    for level in (
+        "debug",
+        "info",
+        "warning",
+        "error",
+        "exception",
+        "fatal",
+        "critical",
+    ):
+        setattr(logger, level, rank_zero_only(getattr(logger, level)))
+
+    return logger
+
+
+log = get_logger(__name__)
+
+
+def extras(config: DictConfig) -> None:
+    """Applies optional utilities, controlled by config flags.
+    Utilities:
+    - Ignoring python warnings
+    - Rich config printing
+    """
+
+    # disable python warnings if <config.ignore_warnings=True>
+    if config.get("ignore_warnings"):
+        log.info("Disabling python warnings! <config.ignore_warnings=True>")
+        warnings.filterwarnings("ignore")
+
+    # pretty print config tree using Rich library if <config.print_config=True>
+    if config.get("print_config"):
+        log.info("Printing config tree with Rich! <config.print_config=True>")
+        print_config(config, resolve=True)
+
+
+@rank_zero_only
+def print_config(
+    config: DictConfig,
+    print_order: Sequence[str] = (
+        "datamodule",
+        "model",
+        "callbacks",
+        "logger",
+        "trainer",
+    ),
+    resolve: bool = True,
+) -> None:
+    """Prints content of DictConfig using Rich library and its tree structure.
+    Args:
+        config (DictConfig): Configuration composed by Hydra.
+        print_order (Sequence[str], optional): Determines in what order config components are printed.
+        resolve (bool, optional): Whether to resolve reference fields of DictConfig.
+    """
+
+    style = "dim"
+    tree = rich.tree.Tree("CONFIG", style=style, guide_style=style)
+
+    quee = []
+
+    for field in print_order:
+        quee.append(field) if field in config else log.info(f"Field '{field}' not found in config")
+
+    for field in config:
+        if field not in quee:
+            quee.append(field)
+
+    for field in quee:
+        branch = tree.add(field, style=style, guide_style=style)
+
+        config_group = config[field]
+        if isinstance(config_group, DictConfig):
+            branch_content = OmegaConf.to_yaml(config_group, resolve=resolve)
+        else:
+            branch_content = str(config_group)
+
+        branch.add(rich.syntax.Syntax(branch_content, "yaml"))
+
+    rich.print(tree)
+
+    with open("config_tree.log", "w") as file:
+        rich.print(tree, file=file)
+
+
+@rank_zero_only
+def log_hyperparameters(
+    config: DictConfig,
+    model: pl.LightningModule,
+    datamodule: pl.LightningDataModule,
+    trainer: pl.Trainer,
+    callbacks: List[pl.Callback],
+    logger: List[pl.loggers.TensorBoardLogger],
+) -> None:
+    """Controls which config parts are saved by Lightning loggers.
+    Additionaly saves:
+    - number of model parameters
+    """
+
+    if not trainer.logger:
+        return
+
+    hparams = {}
+
+    # choose which parts of hydra config will be saved to loggers
+    hparams["model"] = config["model"]
+
+    # save number of model parameters
+    hparams["model/params/total"] = sum(p.numel() for p in model.parameters())
+    hparams["model/params/trainable"] = sum(
+        p.numel() for p in model.parameters() if p.requires_grad
+    )
+    hparams["model/params/non_trainable"] = sum(
+        p.numel() for p in model.parameters() if not p.requires_grad
+    )
+
+    hparams["datamodule"] = config["datamodule"]
+    hparams["trainer"] = config["trainer"]
+
+    if "seed" in config:
+        hparams["seed"] = config["seed"]
+    if "callbacks" in config:
+        hparams["callbacks"] = config["callbacks"]
+
+    # send hparams to all loggers
+    trainer.logger.log_hyperparams(hparams)
+
+
+def finish(
+    config: DictConfig,
+    model: pl.LightningModule,
+    datamodule: pl.LightningDataModule,
+    trainer: pl.Trainer,
+    callbacks: List[pl.Callback],
+    logger: List[pl.loggers.TensorBoardLogger],
+) -> None:
+    """Makes sure everything closed properly."""
+
+    # without this sweeps with wandb logger might crash!
+    for lg in logger:
+        if isinstance(lg, pl.loggers.WandbLogger):
+            import wandb
+
+            wandb.finish()
\ No newline at end of file
diff --git a/src/ir/src/utils/__pycache__/__init__.cpython-38.pyc b/src/ir/src/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..93816408e38ed07d26a090f6fff2d324bded9611
Binary files /dev/null and b/src/ir/src/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/ir/src/utils/__pycache__/evaluate.cpython-38.pyc b/src/ir/src/utils/__pycache__/evaluate.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a20fe981297fc5cf147d087999f99825feba0937
Binary files /dev/null and b/src/ir/src/utils/__pycache__/evaluate.cpython-38.pyc differ
diff --git a/src/ir/src/utils/evaluate.py b/src/ir/src/utils/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c5445c4cc694e712d7133b73847c4667a199652
--- /dev/null
+++ b/src/ir/src/utils/evaluate.py
@@ -0,0 +1,71 @@
+import json
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.metrics import (average_precision_score,
+                             precision_recall_curve,
+                             PrecisionRecallDisplay,
+                             classification_report)
+
+
+def evaluate(results, datamodule, output):
+    y_pred = None
+    y_true = None
+    for preds, labels in results:
+        if y_pred is None:
+            y_pred = preds
+        else:
+            y_pred = np.append(y_pred, preds, axis=0)
+
+        if y_true is None:
+            y_true = labels
+        else:
+            y_true = np.append(y_true, labels, axis=0)
+
+    # calculate metrics
+    mAP = average_precision_score(y_true.flatten(), y_pred.flatten()) * 100
+    print("Evaluation mAP: %.2f" % mAP)
+
+    print(classification_report(np.argmax(y_true, axis=1), np.argmax(
+        y_pred, axis=1), zero_division=1, target_names=datamodule.mlb.label))
+
+    # display precision-recall curve
+    precision, recall, _ = precision_recall_curve(y_true.flatten(), y_pred.flatten())
+    disp = PrecisionRecallDisplay(precision=precision, recall=recall)
+    disp.plot()
+    plt.show()
+
+    # convert to label name
+    y_true = datamodule.mlb.inverse_transform(y_true)
+
+    threshold = 0.5
+    y_pred = np.where(y_pred >= threshold, 1, 0)
+    y_pred = datamodule.mlb.inverse_transform(y_pred)
+
+    with open("./evaluate.txt", 'w', encoding="utf-8") as f:
+        for index in range(len(y_pred)):
+            f.write(datamodule.test_dataset.text[index] + ", " +
+                    str(y_true[index]) + ", " + str(y_pred[index]) + "\n")
+    return y_true, y_pred
+
+
+def evaluate_predict(results, datamodule, output, threshold = 0.5):
+    data = {}
+    y_pred = None
+    
+    if results is None:
+        return data
+    for preds in results:
+        if y_pred is None:
+            y_pred = preds
+        else:
+            y_pred = np.append(y_pred, preds, axis=0)
+
+    y_pred = np.where(y_pred >= threshold, 1, 0)
+    y_pred = datamodule.mlb.inverse_transform(y_pred)
+
+    with open(output, 'w', encoding="utf-8") as f:
+        for index in range(len(y_pred)):
+            data[datamodule.test_dataset.text[index]] = str(y_pred[index])
+        json.dump(data, f, ensure_ascii=False)
+
+    return data
diff --git a/src/ir/src/vendor/__init__.py b/src/ir/src/vendor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e99c3ba17a192caf22b016d2edb46b4e1e48c3d
--- /dev/null
+++ b/src/ir/src/vendor/__init__.py
@@ -0,0 +1 @@
+# use this folder for storing third party code that cannot be installed using pip/conda
\ No newline at end of file
diff --git a/src/ir/test.py b/src/ir/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..d882261a8637148ad7a35951dff0df273268de1d
--- /dev/null
+++ b/src/ir/test.py
@@ -0,0 +1,28 @@
+import sys
+sys.path.insert(0, 'src/ir')
+import dotenv
+import hydra
+from omegaconf import DictConfig
+
+# load environment variables from `.env` file if it exists
+# recursively searches for `.env` in all folders starting from work dir
+dotenv.load_dotenv(override=True)
+
+
+@hydra.main(version_base = None, config_path="configs/", config_name="test.yaml")
+def main(config: DictConfig):
+
+    # Imports can be nested inside @hydra.main to optimize tab completion
+    # https://github.com/facebookresearch/hydra/issues/934
+    from src import utils
+    from src.testing_pipeline import test
+
+    # Applies optional utilities
+    utils.extras(config)
+
+    # Evaluate model
+    return test(config)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/src/ir/tests/__init__.py b/src/ir/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/tests/helpers/__init__.py b/src/ir/tests/helpers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/tests/helpers/module_available.py b/src/ir/tests/helpers/module_available.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e983611b7b7c72f2d8c8852784ed5d592d310c6
--- /dev/null
+++ b/src/ir/tests/helpers/module_available.py
@@ -0,0 +1,27 @@
+import platform
+from importlib.util import find_spec
+
+"""
+Adapted from:
+    https://github.com/PyTorchLightning/pytorch-lightning/blob/master/pytorch_lightning/utilities/imports.py
+"""
+
+
+def _module_available(module_path: str) -> bool:
+    """Check if a path is available in your environment.
+    >>> _module_available('os')
+    True
+    >>> _module_available('bla.bla')
+    False
+    """
+    try:
+        return find_spec(module_path) is not None
+    except ModuleNotFoundError:
+        # Python 3.7+
+        return False
+
+
+_IS_WINDOWS = platform.system() == "Windows"
+_DEEPSPEED_AVAILABLE = not _IS_WINDOWS and _module_available("deepspeed")
+_FAIRSCALE_AVAILABLE = not _IS_WINDOWS and _module_available("fairscale.nn")
+_RPC_AVAILABLE = not _IS_WINDOWS and _module_available("torch.distributed.rpc")
\ No newline at end of file
diff --git a/src/ir/tests/helpers/run_command.py b/src/ir/tests/helpers/run_command.py
new file mode 100644
index 0000000000000000000000000000000000000000..18117152f9b11ee427532126e433f33a422e0553
--- /dev/null
+++ b/src/ir/tests/helpers/run_command.py
@@ -0,0 +1,15 @@
+from typing import List
+
+import pytest
+import sh
+
+
+def run_command(command: List[str]):
+    """Default method for executing shell commands with pytest."""
+    msg = None
+    try:
+        sh.python(command)
+    except sh.ErrorReturnCode as e:
+        msg = e.stderr.decode()
+    if msg:
+        pytest.fail(msg=msg)
\ No newline at end of file
diff --git a/src/ir/tests/helpers/runif.py b/src/ir/tests/helpers/runif.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ebe2930af8f0b1c29619c2fc69eacd909ddda76
--- /dev/null
+++ b/src/ir/tests/helpers/runif.py
@@ -0,0 +1,101 @@
+import sys
+from typing import Optional
+
+import pytest
+import torch
+from packaging.version import Version
+from pkg_resources import get_distribution
+
+"""
+Adapted from:
+    https://github.com/PyTorchLightning/pytorch-lightning/blob/master/tests/helpers/runif.py
+"""
+
+from tests.helpers.module_available import (
+    _DEEPSPEED_AVAILABLE,
+    _FAIRSCALE_AVAILABLE,
+    _IS_WINDOWS,
+    _RPC_AVAILABLE,
+)
+
+
+class RunIf:
+    """RunIf wrapper for conditional skipping of tests.
+    Fully compatible with `@pytest.mark`.
+    Example:
+        @RunIf(min_torch="1.8")
+        @pytest.mark.parametrize("arg1", [1.0, 2.0])
+        def test_wrapper(arg1):
+            assert arg1 > 0
+    """
+
+    def __new__(
+        self,
+        min_gpus: int = 0,
+        min_torch: Optional[str] = None,
+        max_torch: Optional[str] = None,
+        min_python: Optional[str] = None,
+        skip_windows: bool = False,
+        rpc: bool = False,
+        fairscale: bool = False,
+        deepspeed: bool = False,
+        **kwargs,
+    ):
+        """
+        Args:
+            min_gpus: min number of gpus required to run test
+            min_torch: minimum pytorch version to run test
+            max_torch: maximum pytorch version to run test
+            min_python: minimum python version required to run test
+            skip_windows: skip test for Windows platform
+            rpc: requires Remote Procedure Call (RPC)
+            fairscale: if `fairscale` module is required to run the test
+            deepspeed: if `deepspeed` module is required to run the test
+            kwargs: native pytest.mark.skipif keyword arguments
+        """
+        conditions = []
+        reasons = []
+
+        if min_gpus:
+            conditions.append(torch.cuda.device_count() < min_gpus)
+            reasons.append(f"GPUs>={min_gpus}")
+
+        if min_torch:
+            torch_version = get_distribution("torch").version
+            conditions.append(Version(torch_version) < Version(min_torch))
+            reasons.append(f"torch>={min_torch}")
+
+        if max_torch:
+            torch_version = get_distribution("torch").version
+            conditions.append(Version(torch_version) >= Version(max_torch))
+            reasons.append(f"torch<{max_torch}")
+
+        if min_python:
+            py_version = (
+                f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+            )
+            conditions.append(Version(py_version) < Version(min_python))
+            reasons.append(f"python>={min_python}")
+
+        if skip_windows:
+            conditions.append(_IS_WINDOWS)
+            reasons.append("does not run on Windows")
+
+        if rpc:
+            conditions.append(not _RPC_AVAILABLE)
+            reasons.append("RPC")
+
+        if fairscale:
+            conditions.append(not _FAIRSCALE_AVAILABLE)
+            reasons.append("Fairscale")
+
+        if deepspeed:
+            conditions.append(not _DEEPSPEED_AVAILABLE)
+            reasons.append("Deepspeed")
+
+        reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
+        return pytest.mark.skipif(
+            condition=any(conditions),
+            reason=f"Requires: [{' + '.join(reasons)}]",
+            **kwargs,
+        )
\ No newline at end of file
diff --git a/src/ir/tests/shell/__init__.py b/src/ir/tests/shell/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/tests/shell/test_basic_commands.py b/src/ir/tests/shell/test_basic_commands.py
new file mode 100644
index 0000000000000000000000000000000000000000..5af6409c710edaf56678e3f2e55157be6e4c99bf
--- /dev/null
+++ b/src/ir/tests/shell/test_basic_commands.py
@@ -0,0 +1,58 @@
+import pytest
+
+from tests.helpers.run_command import run_command
+from tests.helpers.runif import RunIf
+
+"""
+A couple of sanity checks to make sure the model doesn't crash with different running options.
+"""
+
+
+def test_fast_dev_run():
+    """Test running for 1 train, val and test batch."""
+    command = ["train.py", "++trainer.fast_dev_run=true"]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_cpu():
+    """Test running 1 epoch on CPU."""
+    command = ["train.py", "++trainer.max_epochs=1", "++trainer.gpus=0"]
+    run_command(command)
+
+
+# use RunIf to skip execution of some tests, e.g. when no gpus are available
+@RunIf(min_gpus=1)
+@pytest.mark.slow
+def test_gpu():
+    """Test running 1 epoch on GPU."""
+    command = [
+        "train.py",
+        "++trainer.max_epochs=1",
+        "++trainer.gpus=1",
+    ]
+    run_command(command)
+
+
+@RunIf(min_gpus=1)
+@pytest.mark.slow
+def test_mixed_precision():
+    """Test running 1 epoch with pytorch native automatic mixed precision (AMP)."""
+    command = [
+        "train.py",
+        "++trainer.max_epochs=1",
+        "++trainer.gpus=1",
+        "++trainer.precision=16",
+    ]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_double_validation_loop():
+    """Test running 1 epoch with validation loop twice per epoch."""
+    command = [
+        "train.py",
+        "++trainer.max_epochs=1",
+        "++trainer.val_check_interval=0.5",
+    ]
+    run_command(command)
\ No newline at end of file
diff --git a/src/ir/tests/shell/test_debug_configs.py b/src/ir/tests/shell/test_debug_configs.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f76649aea12b4f6a174c3a7307257cf21de183f
--- /dev/null
+++ b/src/ir/tests/shell/test_debug_configs.py
@@ -0,0 +1,35 @@
+import pytest
+
+from tests.helpers.run_command import run_command
+
+
+@pytest.mark.slow
+def test_debug_default():
+    command = ["train.py", "debug=default"]
+    run_command(command)
+
+
+def test_debug_limit_batches():
+    command = ["train.py", "debug=limit_batches"]
+    run_command(command)
+
+
+def test_debug_overfit():
+    command = ["train.py", "debug=overfit"]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_debug_profiler():
+    command = ["train.py", "debug=profiler"]
+    run_command(command)
+
+
+def test_debug_step():
+    command = ["train.py", "debug=step"]
+    run_command(command)
+
+
+def test_debug_test_only():
+    command = ["train.py", "debug=test_only"]
+    run_command(command)
\ No newline at end of file
diff --git a/src/ir/tests/shell/test_sweeps.py b/src/ir/tests/shell/test_sweeps.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b62a6f298fe8645a765abab4be52a11ed1c3f6c
--- /dev/null
+++ b/src/ir/tests/shell/test_sweeps.py
@@ -0,0 +1,43 @@
+import pytest
+
+from tests.helpers.run_command import run_command
+
+"""
+A couple of tests executing hydra sweeps.
+Use the following command to skip slow tests:
+    pytest -k "not slow"
+"""
+
+
+@pytest.mark.slow
+def test_experiments():
+    """Test running all available experiment configs for 1 epoch."""
+    command = ["train.py", "-m", "experiment=glob(*)", "++trainer.max_epochs=1"]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_default_sweep():
+    """Test default Hydra sweeper."""
+    command = [
+        "train.py",
+        "-m",
+        "datamodule.batch_size=64,128",
+        "model.lr=0.01,0.02",
+        "trainer=default",
+        "++trainer.fast_dev_run=true",
+    ]
+    run_command(command)
+
+
+@pytest.mark.slow
+def test_optuna_sweep():
+    """Test Optuna sweeper."""
+    command = [
+        "train.py",
+        "-m",
+        "hparams_search=mnist_optuna",
+        "trainer=default",
+        "++trainer.fast_dev_run=true",
+    ]
+    run_command(command)
\ No newline at end of file
diff --git a/src/ir/tests/unit/__init__.py b/src/ir/tests/unit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ir/tests/unit/test_mnist_datamodule.py b/src/ir/tests/unit/test_mnist_datamodule.py
new file mode 100644
index 0000000000000000000000000000000000000000..87b5af773e936c3ecbc8ddc79bbe01036354aeeb
--- /dev/null
+++ b/src/ir/tests/unit/test_mnist_datamodule.py
@@ -0,0 +1,37 @@
+import os
+
+import pytest
+import torch
+
+from src.datamodules.mnist_datamodule import MNISTDataModule
+
+
+@pytest.mark.parametrize("batch_size", [32, 128])
+def test_mnist_datamodule(batch_size):
+    datamodule = MNISTDataModule(batch_size=batch_size)
+    datamodule.prepare_data()
+
+    assert not datamodule.data_train and not datamodule.data_val and not datamodule.data_test
+
+    assert os.path.exists(os.path.join("data", "MNIST"))
+    assert os.path.exists(os.path.join("data", "MNIST", "raw"))
+
+    datamodule.setup()
+
+    assert datamodule.data_train and datamodule.data_val and datamodule.data_test
+    assert (
+        len(datamodule.data_train) + len(datamodule.data_val) + len(datamodule.data_test) == 70_000
+    )
+
+    assert datamodule.train_dataloader()
+    assert datamodule.val_dataloader()
+    assert datamodule.test_dataloader()
+
+    batch = next(iter(datamodule.train_dataloader()))
+    x, y = batch
+
+    assert len(x) == batch_size
+    assert len(y) == batch_size
+    assert x.dtype == torch.float32
+    assert y.dtype == torch.int64
+    
\ No newline at end of file
diff --git a/src/ir/train.py b/src/ir/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..8666977f57c2d592ae0ea282d4d068f1d399f361
--- /dev/null
+++ b/src/ir/train.py
@@ -0,0 +1,26 @@
+import dotenv
+import hydra
+from omegaconf import DictConfig
+
+# load environment variables from `.env` file if it exists
+# recursively searches for `.env` in all folders starting from work dir
+dotenv.load_dotenv(override=True)
+
+
+@hydra.main(version_base=None, config_path="configs/", config_name="train.yaml")
+def main(config: DictConfig):
+
+    # Imports can be nested inside @hydra.main to optimize tab completion
+    # https://github.com/facebookresearch/hydra/issues/934
+    from src import utils
+    from src.training_pipeline import train
+
+    # Applies optional utilities
+    utils.extras(config)
+
+    # Train model
+    return train(config)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/src/ir/utils/.gitkeep b/src/ir/utils/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/ss/datasets_signboard_detection/datamodule.py b/src/ss/datasets_signboard_detection/datamodule.py
new file mode 100644
index 0000000000000000000000000000000000000000..6446b9c1f82d53eef10fd96e206a7e14d89164b8
--- /dev/null
+++ b/src/ss/datasets_signboard_detection/datamodule.py
@@ -0,0 +1,40 @@
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader
+from torchvision import transforms
+from src.ss.datasets_signboard_detection.dataset import PoIDataset
+import src.ss.datasets_signboard_detection.utils as utils
+
+
+class POIDataModule(pl.LightningDataModule):
+    def __init__(self,
+                 data_path: str,
+                 train_batch_size=8,
+                 test_batch_size=8,
+                 seed=28):
+        super().__init__()
+        self.data_path = data_path
+        self.train_batch_size = train_batch_size
+        self.test_batch_size = test_batch_size
+        self.seed = seed
+
+    def prepare_data(self):
+        pass
+
+    def setup(self, stage="fit"):
+        transform = [transforms.ToTensor()]
+        test_transform = transforms.Compose(transform)
+        if stage == "predict" or stage is None:
+            self.test_dataset = PoIDataset(self.data_path,
+                                           transforms=test_transform)
+
+    def predict_dataloader(self):
+        if self.test_dataset is not None:
+            return DataLoader(self.test_dataset,
+                              batch_size=self.test_batch_size,
+                              shuffle=False,
+                              num_workers=16,
+                              collate_fn=utils.collate_fn)
+
+    def _get_name(filepath):
+        images = filepath
+        return images
diff --git a/src/ss/datasets_signboard_detection/dataset.py b/src/ss/datasets_signboard_detection/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..39f0a030278ed98e876234157b09961b68e91300
--- /dev/null
+++ b/src/ss/datasets_signboard_detection/dataset.py
@@ -0,0 +1,36 @@
+from PIL import Image
+from torch.utils.data import Dataset
+
+
+class Labelizer():
+    def __init__(self):
+        super().__init__()
+        self.labels = {'background': 0, 'bien': 1}
+        self.inv_labels = {0: 'background', 1: 'bien'}
+
+    def transform(self, label):
+        return self.labels[label]
+
+    def inverse_transform(self, ys):
+        return self.inv_labels(ys)
+
+    def num_classes(self):
+        return len(self.labels)
+
+
+class PoIDataset(Dataset):
+    def __init__(self,
+                 data_path,
+                 transforms=None):
+        self.data_path = data_path
+        self.transforms = transforms
+
+    def __len__(self):
+        return len(self.data_path)
+
+    def __getitem__(self, idx):
+        image = Image.open(self.data_path[idx]).convert('RGB')
+        target = {}
+        if self.transforms is not None:
+            image = self.transforms(image)
+        return image, target
diff --git a/src/ss/datasets_signboard_detection/utils.py b/src/ss/datasets_signboard_detection/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..82ae79bc3fba0e9968c247a17de708e16b764068
--- /dev/null
+++ b/src/ss/datasets_signboard_detection/utils.py
@@ -0,0 +1,324 @@
+from collections import defaultdict, deque
+import datetime
+import pickle
+import time
+
+import torch
+import torch.distributed as dist
+
+import errno
+import os
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not is_dist_avail_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+        dist.barrier()
+        dist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value)
+
+
+def all_gather(data):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors)
+    Args:
+        data: any picklable object
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+
+    # serialized to a Tensor
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to("cuda")
+
+    # obtain Tensor size of each rank
+    local_size = torch.tensor([tensor.numel()], device="cuda")
+    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
+    dist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
+    if local_size != max_size:
+        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+        tensor = torch.cat((tensor, padding), dim=0)
+    dist.all_gather(tensor_list, tensor)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Args:
+        input_dict (dict): all the values will be reduced
+        average (bool): whether to do average or sum
+    Reduce the values in the dictionary from all processes so that all processes
+    have the averaged results. Returns a dict with the same fields as
+    input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.all_reduce(values)
+        if average:
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+
+
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError("'{}' object has no attribute '{}'".format(
+            type(self).__name__, attr))
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append(
+                "{}: {}".format(name, str(meter))
+            )
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ''
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt='{avg:.4f}')
+        data_time = SmoothedValue(fmt='{avg:.4f}')
+        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+        if torch.cuda.is_available():
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}',
+                'max mem: {memory:.0f}'
+            ])
+        else:
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}'
+            ])
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time),
+                        memory=torch.cuda.max_memory_allocated() / MB))
+                else:
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time)))
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print('{} Total time: {} ({:.4f} s / it)'.format(
+            header, total_time_str, total_time / len(iterable)))
+
+
+def collate_fn(batch):
+    return tuple(zip(*batch))
+
+
+def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
+
+    def f(x):
+        if x >= warmup_iters:
+            return 1
+        alpha = float(x) / warmup_iters
+        return warmup_factor * (1 - alpha) + alpha
+
+    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
+
+
+def mkdir(path):
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop('force', False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ['WORLD_SIZE'])
+        args.gpu = int(os.environ['LOCAL_RANK'])
+    elif 'SLURM_PROCID' in os.environ:
+        args.rank = int(os.environ['SLURM_PROCID'])
+        args.gpu = args.rank % torch.cuda.device_count()
+    else:
+        print('Not using distributed mode')
+        args.distributed = False
+        return
+
+    args.distributed = True
+
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = 'nccl'
+    print('| distributed init (rank {}): {}'.format(
+        args.rank, args.dist_url), flush=True)
+    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                         world_size=args.world_size, rank=args.rank)
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
diff --git a/src/ss/det_models/backbone.py b/src/ss/det_models/backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d4163adfe14dc3e47a3228cda50d02a70809f3e
--- /dev/null
+++ b/src/ss/det_models/backbone.py
@@ -0,0 +1,36 @@
+import torchvision.models.detection as models
+from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
+
+def set_parameter_requires_grad(model, 
+                                tune_only: bool = False):
+    if tune_only:
+        for child in list(model.children()):
+            for param in child.parameters():
+                param.requires_grad = False
+
+
+def initialize_model(model_name: str, 
+                     num_classes: int, 
+                     tune_only: bool = False, 
+                     use_pretrained: bool = True):
+    input_size = 0
+
+    model = getattr(models, model_name, lambda: None)
+    model_ft = model(pretrained=use_pretrained)
+    set_parameter_requires_grad(model_ft, tune_only)
+
+    if model_name.startswith("maskrcnn"):
+        mask_predictor_in_channels = 256
+        mask_dim_reduced = 256
+        model_ft.mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels, mask_dim_reduced, num_classes)
+
+    elif model_name.startswith("fasterrcnn"):
+        from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+        # get number of input features for the classifier
+        in_features = model_ft.roi_heads.box_predictor.cls_score.in_features
+        model_ft.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+
+    else:
+        raise ValueError("{0} is not supported!".format(model_name))
+
+    return model_ft, input_size
\ No newline at end of file
diff --git a/src/ss/det_models/inference_signboard_detection.py b/src/ss/det_models/inference_signboard_detection.py
new file mode 100644
index 0000000000000000000000000000000000000000..88ca50ab4a813afe2953b1c8498067b64cacc96a
--- /dev/null
+++ b/src/ss/det_models/inference_signboard_detection.py
@@ -0,0 +1,75 @@
+import pytorch_lightning as pl
+import numpy as np
+from PIL import Image
+from src.ss.datasets_signboard_detection.dataset import Labelizer
+
+class Color_convert():
+    def __init__(self):
+        super().__init__()
+        self.labels = {'bien': "red"}
+        
+    def transform(self, label):
+        return self.labels[label]
+    
+    def num_classes(self):
+        return len(self.labels)
+
+def compose(output, mask):
+    w,h = mask.shape
+
+    for i in range(0, w):
+        for j in range(0,h):
+            if (mask[i,j] > 0.5):
+                output[i,j] = 1
+    return output
+
+class POIDetectionTask(pl.LightningModule):
+    def __init__(self,
+                 model,
+                 score):
+        super().__init__()
+        
+        self.model = model
+        self.output = []
+        self.score = score
+        self.labelizer = Labelizer()
+        self.color_convert = Color_convert()
+
+    def forward(self, x):
+        output = self.model(x)
+        return output
+
+    def predict_step(self, test_batch, batch_idx):
+        images, targets = test_batch
+        outputs = self(images)
+        for target in outputs:
+            shape = target['boxes']
+            masks = target['masks']
+            scores = target['scores']
+            labels = target['labels']
+            shape = shape.cpu().numpy()
+            masks = masks.cpu().numpy()
+            scores = scores.cpu().numpy()
+            labels = labels.cpu().numpy()
+            select_shape = []
+            select_masks = []
+            select_scores = []
+            select_labels = []
+            for i in range(len(scores)):
+                if (scores[i]>self.score):
+                    select_shape.append(shape[i])
+                    select_masks.append(masks[i])
+                    select_scores.append(scores[i])
+                    select_labels.append(labels[i])
+
+            output = {
+                'boxes': np.array(select_shape, dtype=np.int32).tolist(),
+                # 'masks': np.array(select_masks, dtype=np.uint8).tolist(),
+                'scores': np.array(select_scores, dtype=np.float32).tolist(),
+                'labels': np.array(select_labels, dtype=np.int32).tolist()
+            }
+            
+            self.output.append(output)
+    
+    def on_predict_end(self):        
+        self.output = self.output
\ No newline at end of file
diff --git a/src/ss/det_models/model.py b/src/ss/det_models/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..e08eee09e1d775a6c7f4db83ee0b67d88fe3bac4
--- /dev/null
+++ b/src/ss/det_models/model.py
@@ -0,0 +1,21 @@
+import pytorch_lightning as pl
+from src.ss.det_models.backbone import initialize_model
+
+class POIDetection(pl.LightningModule):
+    def __init__(self,
+                 n_classes,
+                 **kwargs):
+        super().__init__()
+        self.save_hyperparameters()
+        self.model, _ = initialize_model(kwargs["backbone"], 
+                                         n_classes, 
+                                         tune_only=kwargs["tune_fc_only"])
+        
+    def forward(self, images, targets=None):
+        images = list(image for image in images)
+        if targets is not None :
+            targets = [{k: v for k, v in t.items()} for t in targets]
+            outputs = self.model(images, targets)
+        else:
+            outputs = self.model(images)
+        return outputs
diff --git a/src/ss/main.py b/src/ss/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8ecd4f18cbe746b4dc8a041589d2312923521a1
--- /dev/null
+++ b/src/ss/main.py
@@ -0,0 +1,62 @@
+from signboard_detect import inference_signboard
+import os
+import numpy as np
+import argparse
+import tqdm
+import cv2
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(description="Signboard Detection")
+
+    parser.add_argument("--input",
+                        type=str,
+                        default="./images",
+                        help="A list of space separated input images")
+    parser.add_argument("--output",
+                        type=str,
+                        default="./output/output_signboard",
+                        help="A list of array of segmentation")
+    parser.add_argument("--checkpoint",
+                        type=str,
+                        default="./checkpoints/ss/ss.ckpt",
+                        help="File path to best model checkpoint")
+
+    args = parser.parse_args()
+    return args
+
+
+def handle(args):
+    if args.input:
+        if os.path.isdir(args.input):
+            args.input = [os.path.join(args.input, fname)
+                          for fname in os.listdir(args.input)]
+
+    for path in tqdm.tqdm(args.input):
+        print(path)
+        img = cv2.imread(path)
+        dimensions = img.shape
+        hei, wid = dimensions[0], dimensions[1]
+        print(hei, wid)
+        segment_array = inference_signboard(path, args.checkpoint).astype(int)
+        h, w = segment_array.shape
+        print(h, w)
+        if hei == h and wid == w:
+            segment_array = segment_array
+        else:
+            segment_array = cv2.rotate(
+                segment_array, cv2.ROTATE_90_CLOCKWISE)
+        txt_name = str(path.split("/")[-1].split(".")[0]) + '.txt'
+        if args.output:
+            output_path = os.path.join(args.output, txt_name)
+
+            np.savetxt(output_path, segment_array)
+
+
+def main():
+    args = get_parser()
+    handle(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/ss/requirements.txt b/src/ss/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ba9e64c715529ef1c1bddb9263954f2a1104e35
--- /dev/null
+++ b/src/ss/requirements.txt
@@ -0,0 +1 @@
+pytorch-lightning
\ No newline at end of file
diff --git a/src/ss/signboard_detect.py b/src/ss/signboard_detect.py
new file mode 100644
index 0000000000000000000000000000000000000000..253e3f61523146a29f595aad337c69a9f8c215e2
--- /dev/null
+++ b/src/ss/signboard_detect.py
@@ -0,0 +1,49 @@
+import pytorch_lightning as pl
+from src.ss.det_models.model import POIDetection
+from src.ss.datasets_signboard_detection.datamodule import POIDataModule
+from src.ss.det_models.inference_signboard_detection import POIDetectionTask
+
+
+def load_model(checkpoint_path):
+    model = POIDetection.load_from_checkpoint(checkpoint_path=checkpoint_path)
+    return model
+
+
+def inference_signboard(image_path, checkpoint, score):
+
+    dm = POIDataModule(data_path=image_path,
+                       seed=42)
+    dm.setup("predict")
+
+    model = load_model(checkpoint)
+    from src.ss.det_models.inference_signboard_detection import POIDetectionTask
+    task = POIDetectionTask(model,
+                            data_path=image_path,
+                            score=score)
+
+    # accelerator='gpu', devices=1
+    trainer = pl.Trainer(gpus=1,
+                         max_epochs=-1)
+    trainer.predict(task, datamodule=dm)
+    return task.output
+
+
+class SignBoardDetector():
+    def __init__(self,
+                 checkpoint) -> None:
+        self.model = POIDetection.load_from_checkpoint(
+            checkpoint_path=checkpoint)
+
+    def inference_signboard(self, image, score):
+        dm = POIDataModule(data_path=image,
+                           seed=42)
+        dm.setup("predict")
+
+        task = POIDetectionTask(self.model,
+                                score=score)
+
+        trainer = pl.Trainer(gpus=1,
+                             max_epochs=-1)
+        trainer.predict(task, datamodule=dm)
+        return task.output
+
diff --git a/src/ss/ss.py b/src/ss/ss.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2ef4b5e1ed4d720fae1d32756a69ad319746d6a
--- /dev/null
+++ b/src/ss/ss.py
@@ -0,0 +1,26 @@
+from src.ss.signboard_detect import inference_signboard
+import os
+import numpy as np
+import cv2
+
+
+def handle_ss(input, output):
+    checkpoint = "./checkpoints/ss/ss.ckpt"
+    if input:
+        img = cv2.imread(input)
+        dimensions = img.shape
+        hei, wid = dimensions[0], dimensions[1]
+        segment_array = inference_signboard(input, checkpoint).astype(int)
+        h, w = segment_array.shape
+        if hei == h and wid == w:
+            segment_array = segment_array
+        else:
+            segment_array = cv2.rotate(
+                segment_array, cv2.ROTATE_90_CLOCKWISE)
+        txt_name = str(input.split("/")[-1].split(".")[0]) + '.txt'
+        if output:
+            output_path = os.path.join(output, txt_name)
+
+            np.savetxt(output_path, segment_array)
+
+    return output_path, segment_array
diff --git a/src/sts/GETTING_STARTED.md b/src/sts/GETTING_STARTED.md
new file mode 100644
index 0000000000000000000000000000000000000000..3bdbc9ffaaa90cbf2f68423dde106934e2004d13
--- /dev/null
+++ b/src/sts/GETTING_STARTED.md
@@ -0,0 +1,79 @@
+## Getting Started with Detectron2
+
+This document provides a brief intro of the usage of builtin command-line tools in detectron2.
+
+For a tutorial that involves actual coding with the API,
+see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
+which covers how to run inference with an
+existing model, and how to train a builtin model on a custom dataset.
+
+
+### Inference Demo with Pre-trained Models
+
+1. Pick a model and its config file from
+  [model zoo](MODEL_ZOO.md),
+  for example, `mask_rcnn_R_50_FPN_3x.yaml`.
+2. We provide `demo.py` that is able to demo builtin configs. Run it with:
+```
+cd demo/
+python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
+  --input input1.jpg input2.jpg \
+  [--other-options]
+  --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
+```
+The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation.
+This command will run the inference and show visualizations in an OpenCV window.
+
+For details of the command line arguments, see `demo.py -h` or look at its source code
+to understand its behavior. Some common arguments are:
+* To run __on your webcam__, replace `--input files` with `--webcam`.
+* To run __on a video__, replace `--input files` with `--video-input video.mp4`.
+* To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`.
+* To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
+
+
+### Training & Evaluation in Command Line
+
+We provide two scripts in "tools/plain_train_net.py" and "tools/train_net.py",
+that are made to train all the configs provided in detectron2. You may want to
+use it as a reference to write your own training script.
+
+Compared to "train_net.py", "plain_train_net.py" supports fewer default
+features. It also includes fewer abstraction, therefore is easier to add custom
+logic.
+
+To train a model with "train_net.py", first
+setup the corresponding datasets following
+[datasets/README.md](./datasets/README.md),
+then run:
+```
+cd tools/
+./train_net.py --num-gpus 8 \
+  --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+```
+
+The configs are made for 8-GPU training.
+To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.:
+```
+./train_net.py \
+  --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
+  --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
+```
+
+To evaluate a model's performance, use
+```
+./train_net.py \
+  --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
+  --eval-only MODEL.WEIGHTS /path/to/checkpoint_file
+```
+For more options, see `./train_net.py -h`.
+
+### Use Detectron2 APIs in Your Code
+
+See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
+to learn how to use detectron2 APIs to:
+1. run inference with an existing model
+2. train a builtin model on a custom dataset
+
+See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects)
+for more ways to build your project on detectron2.
diff --git a/src/sts/INSTALL.md b/src/sts/INSTALL.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b83ec93524d60fbe442c46c642022b0ab9e534e
--- /dev/null
+++ b/src/sts/INSTALL.md
@@ -0,0 +1,257 @@
+## Installation
+
+### Requirements
+- Linux or macOS with Python ≥ 3.6
+- PyTorch ≥ 1.6 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
+  Install them together at [pytorch.org](https://pytorch.org) to make sure of this
+- OpenCV is optional but needed by demo and visualization
+
+
+### Build Detectron2 from Source
+
+gcc & g++ ≥ 5.4 are required. [ninja](https://ninja-build.org/) is recommended for faster build.
+After having them, run:
+```
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+# (add --user if you don't have permission)
+
+# Or, to install it from a local clone:
+git clone https://github.com/facebookresearch/detectron2.git
+python -m pip install -e detectron2
+
+# On macOS, you may need to prepend the above commands with a few environment variables:
+CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install ...
+```
+
+To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the
+old build first. You often need to rebuild detectron2 after reinstalling PyTorch.
+
+### Install Pre-Built Detectron2 (Linux only)
+
+Choose from this table to install [v0.4 (Mar 2021)](https://github.com/facebookresearch/detectron2/releases):
+
+<table class="docutils"><tbody><th width="80"> CUDA </th><th valign="bottom" align="left" width="100">torch 1.8</th><th valign="bottom" align="left" width="100">torch 1.7</th><th valign="bottom" align="left" width="100">torch 1.6</th> <tr><td align="left">11.1</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.8/index.html
+</code></pre> </details> </td> <td align="left"> </td> <td align="left"> </td> </tr> <tr><td align="left">11.0</td><td align="left"> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu110/torch1.7/index.html
+</code></pre> </details> </td> <td align="left"> </td> </tr> <tr><td align="left">10.2</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.8/index.html
+</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.7/index.html
+</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.6/index.html
+</code></pre> </details> </td> </tr> <tr><td align="left">10.1</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
+</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html
+</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html
+</code></pre> </details> </td> </tr> <tr><td align="left">9.2</td><td align="left"> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu92/torch1.7/index.html
+</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cu92/torch1.6/index.html
+</code></pre> </details> </td> </tr> <tr><td align="left">cpu</td><td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.8/index.html
+</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.7/index.html
+</code></pre> </details> </td> <td align="left"><details><summary> install </summary><pre><code>python -m pip install detectron2 -f \
+  https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.6/index.html
+</code></pre> </details> </td> </tr></tbody></table>
+
+
+Note that:
+1. The pre-built packages have to be used with corresponding version of CUDA and the official package of PyTorch.
+   Otherwise, please build detectron2 from source.
+2. New packages are released every few months. Therefore, packages may not contain latest features in the master
+   branch and may not be compatible with the master branch of a research project that uses detectron2
+   (e.g. those in [projects](projects)).
+
+### Common Installation Issues
+
+Click each issue for its solutions:
+
+<details>
+<summary>
+Undefined symbols that contains TH,aten,torch,caffe2; Missing torch dynamic libraries; Segmentation fault immediately when using detectron2.
+</summary>
+<br/>
+
+This usually happens when detectron2 or torchvision is not
+compiled with the version of PyTorch you're running.
+
+If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them
+following [pytorch.org](http://pytorch.org). So the versions will match.
+
+If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases)
+to see the corresponding pytorch version required for each pre-built detectron2.
+Or uninstall and reinstall the correct pre-built detectron2.
+
+If the error comes from detectron2 or torchvision that you built manually from source,
+remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment.
+
+If you cannot resolve this problem, please include the output of `gdb -ex "r" -ex "bt" -ex "quit" --args python -m detectron2.utils.collect_env`
+in your issue.
+</details>
+
+<details>
+<summary>
+Undefined C++ symbols (e.g. `GLIBCXX`) or C++ symbols not found.
+</summary>
+<br/>
+Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
+
+This often happens with old anaconda.
+Try `conda update libgcc`. Then rebuild detectron2.
+
+The fundamental solution is to run the code with proper C++ runtime.
+One way is to use `LD_PRELOAD=/path/to/libstdc++.so`.
+
+</details>
+
+<details>
+<summary>
+"nvcc not found" or "Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available".
+</summary>
+<br/>
+CUDA is not found when building detectron2.
+You should make sure
+
+```
+python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)'
+```
+
+print `(True, a directory with cuda)` at the time you build detectron2.
+
+Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config.
+</details>
+
+<details>
+<summary>
+"invalid device function" or "no kernel image is available for execution".
+</summary>
+<br/>
+Two possibilities:
+
+* You build detectron2 with one version of CUDA but run it with a different version.
+
+  To check whether it is the case,
+  use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
+  In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
+  to contain cuda libraries of the same version.
+
+  When they are inconsistent,
+  you need to either install a different build of PyTorch (or build by yourself)
+  to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
+
+* PyTorch/torchvision/Detectron2 is not built for the correct GPU SM architecture (aka. compute capability).
+
+  The architecture included by PyTorch/detectron2/torchvision is available in the "architecture flags" in
+  `python -m detectron2.utils.collect_env`. It must include
+  the architecture of your GPU, which can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus).
+
+  If you're using pre-built PyTorch/detectron2/torchvision, they have included support for most popular GPUs already.
+  If not supported, you need to build them from source.
+
+  When building detectron2/torchvision from source, they detect the GPU device and build for only the device.
+  This means the compiled code may not work on a different GPU device.
+  To recompile them for the correct architecture, remove all installed/compiled files,
+  and rebuild them with the `TORCH_CUDA_ARCH_LIST` environment variable set properly.
+  For example, `export TORCH_CUDA_ARCH_LIST="6.0;7.0"` makes it compile for both P100s and V100s.
+</details>
+
+<details>
+<summary>
+Undefined CUDA symbols; Cannot open libcudart.so
+</summary>
+<br/>
+The version of NVCC you use to build detectron2 or torchvision does
+not match the version of CUDA you are running with.
+This often happens when using anaconda's CUDA runtime.
+
+Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
+In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
+to contain cuda libraries of the same version.
+
+When they are inconsistent,
+you need to either install a different build of PyTorch (or build by yourself)
+to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
+</details>
+
+
+<details>
+<summary>
+C++ compilation errors from NVCC / NVRTC; "Unsupported gpu architecture"
+</summary>
+
+A few possibilities:
+
+1. Local CUDA/NVCC version has to match the CUDA version of your PyTorch. Both can be found in `python collect_env.py`.
+   When they are inconsistent, you need to either install a different build of PyTorch (or build by yourself)
+   to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
+
+2. Local CUDA/NVCC version shall support the SM architecture (a.k.a. compute capability) of your GPU.
+   The capability of your GPU can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus).
+   The capability supported by NVCC is listed at [here](https://gist.github.com/ax3l/9489132).
+   If your NVCC version is too old, this can be workaround by setting environment variable
+   `TORCH_CUDA_ARCH_LIST` to a lower, supported capability.
+
+3. The combination of NVCC and GCC you use is incompatible. You need to change one of their versions.
+   See [here](https://gist.github.com/ax3l/9489132) for some valid combinations.
+   Notably, CUDA<=10.1.105 doesn't support GCC>7.3.
+
+   The CUDA/GCC version used by PyTorch can be found by `print(torch.__config__.show())`.
+
+</details>
+
+
+<details>
+<summary>
+"ImportError: cannot import name '_C'".
+</summary>
+<br/>
+Please build and install detectron2 following the instructions above.
+
+Or, if you are running code from detectron2's root directory, `cd` to a different one.
+Otherwise you may not import the code that you installed.
+</details>
+
+
+<details>
+<summary>
+Any issue on windows.
+</summary>
+<br/>
+
+Detectron2 is continuously built on windows with [CircleCI](https://app.circleci.com/pipelines/github/facebookresearch/detectron2?branch=master).
+However we do not provide official support for it.
+PRs that improves code compatibility on windows are welcome.
+</details>
+
+<details>
+<summary>
+ONNX conversion segfault after some "TraceWarning".
+</summary>
+<br/>
+The ONNX package is compiled with a too old compiler.
+
+Please build and install ONNX from its source code using a compiler
+whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`).
+</details>
+
+
+<details>
+<summary>
+"library not found for -lstdc++" on older version of MacOS
+</summary>
+<br/>
+See [this stackoverflow answer](https://stackoverflow.com/questions/56083725/macos-build-issues-lstdc-not-found-while-building-python-package).
+</details>
+
+
+### Installation inside specific environments:
+
+* __Colab__: see our [Colab Tutorial](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
+  which has step-by-step instructions.
+
+* __Docker__: The official [Dockerfile](docker) installs detectron2 with a few simple commands.
+
diff --git a/src/sts/MODEL_ZOO.md b/src/sts/MODEL_ZOO.md
new file mode 100644
index 0000000000000000000000000000000000000000..f34733b95dd06e094435c3be9cb3a5515750cac7
--- /dev/null
+++ b/src/sts/MODEL_ZOO.md
@@ -0,0 +1,906 @@
+# Detectron2 Model Zoo and Baselines
+
+## Introduction
+
+This file documents a large collection of baselines trained
+with detectron2 in Sep-Oct, 2019.
+All numbers were obtained on [Big Basin](https://engineering.fb.com/data-center-engineering/introducing-big-basin-our-next-generation-ai-hardware/)
+servers with 8 NVIDIA V100 GPUs & NVLink. The speed numbers are periodically updated with latest PyTorch/CUDA/cuDNN versions.
+You can access these models from code using [detectron2.model_zoo](https://detectron2.readthedocs.io/modules/model_zoo.html) APIs.
+
+In addition to these official baseline models, you can find more models in [projects/](projects/).
+
+#### How to Read the Tables
+* The "Name" column contains a link to the config file. Running `tools/train_net.py --num-gpus 8` with this config file
+  will reproduce the model.
+* Training speed is averaged across the entire training.
+  We keep updating the speed with latest version of detectron2/pytorch/etc.,
+  so they might be different from the `metrics` file.
+  Training speed for multi-machine jobs is not provided.
+* Inference speed is measured by `tools/train_net.py --eval-only`, or [inference_on_dataset()](https://detectron2.readthedocs.io/modules/evaluation.html#detectron2.evaluation.inference_on_dataset),
+  with batch size 1 in detectron2 directly.
+  Measuring it with custom code may introduce other overhead.
+  Actual deployment in production should in general be faster than the given inference
+  speed due to more optimizations.
+* The *model id* column is provided for ease of reference.
+  To check downloaded file integrity, any model on this page contains its md5 prefix in its file name.
+* Training curves and other statistics can be found in `metrics` for each model.
+
+#### Common Settings for COCO Models
+* All COCO models were trained on `train2017` and evaluated on `val2017`.
+* The default settings are __not directly comparable__ with Detectron's standard settings.
+  For example, our default training data augmentation uses scale jittering in addition to horizontal flipping.
+
+  To make fair comparisons with Detectron's settings, see
+  [Detectron1-Comparisons](configs/Detectron1-Comparisons/) for accuracy comparison,
+  and [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html)
+  for speed comparison.
+* For Faster/Mask R-CNN, we provide baselines based on __3 different backbone combinations__:
+  * __FPN__: Use a ResNet+FPN backbone with standard conv and FC heads for mask and box prediction,
+    respectively. It obtains the best
+    speed/accuracy tradeoff, but the other two are still useful for research.
+  * __C4__: Use a ResNet conv4 backbone with conv5 head. The original baseline in the Faster R-CNN paper.
+  * __DC5__ (Dilated-C5): Use a ResNet conv5 backbone with dilations in conv5, and standard conv and FC heads
+    for mask and box prediction, respectively.
+    This is used by the Deformable ConvNet paper.
+* Most models are trained with the 3x schedule (~37 COCO epochs).
+  Although 1x models are heavily under-trained, we provide some ResNet-50 models with the 1x (~12 COCO epochs)
+  training schedule for comparison when doing quick research iteration.
+
+#### ImageNet Pretrained Models
+
+It's common to initialize from backbone models pre-trained on ImageNet classification tasks. The following backbone models are available:
+
+* [R-50.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl): converted copy of [MSRA's original ResNet-50](https://github.com/KaimingHe/deep-residual-networks) model.
+* [R-101.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl): converted copy of [MSRA's original ResNet-101](https://github.com/KaimingHe/deep-residual-networks) model.
+* [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/FAIR/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB.
+* [R-50.pkl (torchvision)](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/torchvision/R-50.pkl): converted copy of [torchvision's ResNet-50](https://pytorch.org/docs/stable/torchvision/models.html#torchvision.models.resnet50) model.
+  More details can be found in [the conversion script](tools/convert-torchvision-to-d2.py).
+
+Note that the above models have __different__ format from those provided in Detectron: we do not fuse BatchNorm into an affine layer.
+Pretrained models in Detectron's format can still be used. For example:
+* [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl):
+  ResNeXt-152-32x8d model trained on ImageNet-5k with Caffe2 at FB (see ResNeXt paper for details on ImageNet-5k).
+* [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl):
+  ResNet-50 with Group Normalization.
+* [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl):
+  ResNet-101 with Group Normalization.
+
+These models require slightly different settings regarding normalization and architecture. See the model zoo configs for reference.
+
+#### License
+
+All models available for download through this document are licensed under the
+[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/).
+
+### COCO Object Detection Baselines
+
+#### Faster R-CNN:
+<!--
+(fb only) To update the table in vim:
+1. Remove the old table: d}
+2. Copy the below command to the place of the table
+3. :.!bash
+
+./gen_html_table.py --config 'COCO-Detection/faster*50*'{1x,3x}'*' 'COCO-Detection/faster*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: faster_rcnn_R_50_C4_1x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
+<td align="center">1x</td>
+<td align="center">0.551</td>
+<td align="center">0.102</td>
+<td align="center">4.8</td>
+<td align="center">35.7</td>
+<td align="center">137257644</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_R_50_DC5_1x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
+<td align="center">1x</td>
+<td align="center">0.380</td>
+<td align="center">0.068</td>
+<td align="center">5.0</td>
+<td align="center">37.3</td>
+<td align="center">137847829</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/model_final_51d356.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.210</td>
+<td align="center">0.038</td>
+<td align="center">3.0</td>
+<td align="center">37.9</td>
+<td align="center">137257794</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/model_final_b275ba.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_R_50_C4_3x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
+<td align="center">3x</td>
+<td align="center">0.543</td>
+<td align="center">0.104</td>
+<td align="center">4.8</td>
+<td align="center">38.4</td>
+<td align="center">137849393</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_R_50_DC5_3x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
+<td align="center">3x</td>
+<td align="center">0.378</td>
+<td align="center">0.070</td>
+<td align="center">5.0</td>
+<td align="center">39.0</td>
+<td align="center">137849425</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/model_final_68d202.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_R_50_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.209</td>
+<td align="center">0.038</td>
+<td align="center">3.0</td>
+<td align="center">40.2</td>
+<td align="center">137849458</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_R_101_C4_3x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
+<td align="center">3x</td>
+<td align="center">0.619</td>
+<td align="center">0.139</td>
+<td align="center">5.9</td>
+<td align="center">41.1</td>
+<td align="center">138204752</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/model_final_298dad.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_R_101_DC5_3x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
+<td align="center">3x</td>
+<td align="center">0.452</td>
+<td align="center">0.086</td>
+<td align="center">6.1</td>
+<td align="center">40.6</td>
+<td align="center">138204841</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/model_final_3e0943.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_R_101_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.286</td>
+<td align="center">0.051</td>
+<td align="center">4.1</td>
+<td align="center">42.0</td>
+<td align="center">137851257</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_X_101_32x8d_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.638</td>
+<td align="center">0.098</td>
+<td align="center">6.7</td>
+<td align="center">43.0</td>
+<td align="center">139173657</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+#### RetinaNet:
+<!--
+./gen_html_table.py --config 'COCO-Detection/retina*50*' 'COCO-Detection/retina*101*' --name R50 R50 R101 --fields lr_sched train_speed inference_speed mem box_AP
+-->
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: retinanet_R_50_FPN_1x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml">R50</a></td>
+<td align="center">1x</td>
+<td align="center">0.205</td>
+<td align="center">0.041</td>
+<td align="center">4.1</td>
+<td align="center">37.4</td>
+<td align="center">190397773</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/190397773/model_final_bfca0b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/190397773/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: retinanet_R_50_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml">R50</a></td>
+<td align="center">3x</td>
+<td align="center">0.205</td>
+<td align="center">0.041</td>
+<td align="center">4.1</td>
+<td align="center">38.7</td>
+<td align="center">190397829</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/190397829/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: retinanet_R_101_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml">R101</a></td>
+<td align="center">3x</td>
+<td align="center">0.291</td>
+<td align="center">0.054</td>
+<td align="center">5.2</td>
+<td align="center">40.4</td>
+<td align="center">190397697</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/190397697/model_final_971ab9.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/190397697/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+
+#### RPN & Fast R-CNN:
+<!--
+./gen_html_table.py --config 'COCO-Detection/rpn*' 'COCO-Detection/fast_rcnn*' --name "RPN R50-C4" "RPN R50-FPN" "Fast R-CNN R50-FPN" --fields lr_sched train_speed inference_speed mem box_AP prop_AR
+-->
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">prop.<br/>AR</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: rpn_R_50_C4_1x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_C4_1x.yaml">RPN R50-C4</a></td>
+<td align="center">1x</td>
+<td align="center">0.130</td>
+<td align="center">0.034</td>
+<td align="center">1.5</td>
+<td align="center"></td>
+<td align="center">51.6</td>
+<td align="center">137258005</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/model_final_450694.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: rpn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_FPN_1x.yaml">RPN R50-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.186</td>
+<td align="center">0.032</td>
+<td align="center">2.7</td>
+<td align="center"></td>
+<td align="center">58.0</td>
+<td align="center">137258492</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: fast_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml">Fast R-CNN R50-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.140</td>
+<td align="center">0.029</td>
+<td align="center">2.6</td>
+<td align="center">37.8</td>
+<td align="center"></td>
+<td align="center">137635226</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+### COCO Instance Segmentation Baselines with Mask R-CNN
+<!--
+./gen_html_table.py --config 'COCO-InstanceSegmentation/mask*50*'{1x,3x}'*' 'COCO-InstanceSegmentation/mask*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
+-->
+
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: mask_rcnn_R_50_C4_1x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
+<td align="center">1x</td>
+<td align="center">0.584</td>
+<td align="center">0.110</td>
+<td align="center">5.2</td>
+<td align="center">36.8</td>
+<td align="center">32.2</td>
+<td align="center">137259246</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/model_final_9243eb.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_DC5_1x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
+<td align="center">1x</td>
+<td align="center">0.471</td>
+<td align="center">0.076</td>
+<td align="center">6.5</td>
+<td align="center">38.3</td>
+<td align="center">34.2</td>
+<td align="center">137260150</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/model_final_4f86c3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.261</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">38.6</td>
+<td align="center">35.2</td>
+<td align="center">137260431</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_C4_3x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
+<td align="center">3x</td>
+<td align="center">0.575</td>
+<td align="center">0.111</td>
+<td align="center">5.2</td>
+<td align="center">39.8</td>
+<td align="center">34.4</td>
+<td align="center">137849525</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_DC5_3x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
+<td align="center">3x</td>
+<td align="center">0.470</td>
+<td align="center">0.076</td>
+<td align="center">6.5</td>
+<td align="center">40.0</td>
+<td align="center">35.9</td>
+<td align="center">137849551</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.261</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">41.0</td>
+<td align="center">37.2</td>
+<td align="center">137849600</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_101_C4_3x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
+<td align="center">3x</td>
+<td align="center">0.652</td>
+<td align="center">0.145</td>
+<td align="center">6.3</td>
+<td align="center">42.6</td>
+<td align="center">36.7</td>
+<td align="center">138363239</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/model_final_a2914c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_101_DC5_3x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
+<td align="center">3x</td>
+<td align="center">0.545</td>
+<td align="center">0.092</td>
+<td align="center">7.6</td>
+<td align="center">41.9</td>
+<td align="center">37.3</td>
+<td align="center">138363294</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/model_final_0464b7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_101_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.340</td>
+<td align="center">0.056</td>
+<td align="center">4.6</td>
+<td align="center">42.9</td>
+<td align="center">38.6</td>
+<td align="center">138205316</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_X_101_32x8d_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.690</td>
+<td align="center">0.103</td>
+<td align="center">7.2</td>
+<td align="center">44.3</td>
+<td align="center">39.5</td>
+<td align="center">139653917</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+### COCO Person Keypoint Detection Baselines with Keypoint R-CNN
+<!--
+./gen_html_table.py --config 'COCO-Keypoints/*50*' 'COCO-Keypoints/*101*'  --name R50-FPN R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP keypoint_AP
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">kp.<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.315</td>
+<td align="center">0.072</td>
+<td align="center">5.0</td>
+<td align="center">53.6</td>
+<td align="center">64.0</td>
+<td align="center">137261548</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/model_final_04e291.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: keypoint_rcnn_R_50_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.316</td>
+<td align="center">0.066</td>
+<td align="center">5.0</td>
+<td align="center">55.4</td>
+<td align="center">65.5</td>
+<td align="center">137849621</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: keypoint_rcnn_R_101_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.390</td>
+<td align="center">0.076</td>
+<td align="center">6.1</td>
+<td align="center">56.4</td>
+<td align="center">66.1</td>
+<td align="center">138363331</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: keypoint_rcnn_X_101_32x8d_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.738</td>
+<td align="center">0.121</td>
+<td align="center">8.7</td>
+<td align="center">57.3</td>
+<td align="center">66.0</td>
+<td align="center">139686956</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/model_final_5ad38f.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+### COCO Panoptic Segmentation Baselines with Panoptic FPN
+<!--
+./gen_html_table.py --config 'COCO-PanopticSegmentation/*50*' 'COCO-PanopticSegmentation/*101*'  --name R50-FPN R50-FPN R101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP PQ
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">PQ</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: panoptic_fpn_R_50_1x -->
+ <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml">R50-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.304</td>
+<td align="center">0.053</td>
+<td align="center">4.8</td>
+<td align="center">37.6</td>
+<td align="center">34.7</td>
+<td align="center">39.4</td>
+<td align="center">139514544</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/model_final_dbfeb4.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: panoptic_fpn_R_50_3x -->
+ <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml">R50-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.302</td>
+<td align="center">0.053</td>
+<td align="center">4.8</td>
+<td align="center">40.0</td>
+<td align="center">36.5</td>
+<td align="center">41.5</td>
+<td align="center">139514569</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: panoptic_fpn_R_101_3x -->
+ <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml">R101-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.392</td>
+<td align="center">0.066</td>
+<td align="center">6.0</td>
+<td align="center">42.4</td>
+<td align="center">38.5</td>
+<td align="center">43.0</td>
+<td align="center">139514519</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+
+### LVIS Instance Segmentation Baselines with Mask R-CNN
+
+Mask R-CNN baselines on the [LVIS dataset](https://lvisdataset.org), v0.5.
+These baselines are described in Table 3(c) of the [LVIS paper](https://arxiv.org/abs/1908.03195).
+
+NOTE: the 1x schedule here has the same amount of __iterations__ as the COCO 1x baselines.
+They are roughly 24 epochs of LVISv0.5 data.
+The final results of these configs have large variance across different runs.
+
+<!--
+./gen_html_table.py --config 'LVISv0.5-InstanceSegmentation/mask*50*' 'LVISv0.5-InstanceSegmentation/mask*101*' --name R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: mask_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.292</td>
+<td align="center">0.107</td>
+<td align="center">7.1</td>
+<td align="center">23.6</td>
+<td align="center">24.4</td>
+<td align="center">144219072</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/model_final_571f7c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_101_FPN_1x -->
+ <tr><td align="left"><a href="configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml">R101-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.371</td>
+<td align="center">0.114</td>
+<td align="center">7.8</td>
+<td align="center">25.6</td>
+<td align="center">25.9</td>
+<td align="center">144219035</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/model_final_824ab5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_X_101_32x8d_FPN_1x -->
+ <tr><td align="left"><a href="configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml">X101-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.712</td>
+<td align="center">0.151</td>
+<td align="center">10.2</td>
+<td align="center">26.7</td>
+<td align="center">27.1</td>
+<td align="center">144219108</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/model_final_5e3439.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+
+
+### Cityscapes & Pascal VOC Baselines
+
+Simple baselines for
+* Mask R-CNN on Cityscapes instance segmentation (initialized from COCO pre-training, then trained on Cityscapes fine annotations only)
+* Faster R-CNN on PASCAL VOC object detection (trained on VOC 2007 train+val + VOC 2012 train+val, tested on VOC 2007 using 11-point interpolated AP)
+
+<!--
+./gen_html_table.py --config 'Cityscapes/*' 'PascalVOC-Detection/*' --name "R50-FPN, Cityscapes" "R50-C4, VOC" --fields train_speed inference_speed mem box_AP box_AP50 mask_AP
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">box<br/>AP50</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: mask_rcnn_R_50_FPN -->
+ <tr><td align="left"><a href="configs/Cityscapes/mask_rcnn_R_50_FPN.yaml">R50-FPN, Cityscapes</a></td>
+<td align="center">0.240</td>
+<td align="center">0.078</td>
+<td align="center">4.4</td>
+<td align="center"></td>
+<td align="center"></td>
+<td align="center">36.5</td>
+<td align="center">142423278</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/model_final_af9cf5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: faster_rcnn_R_50_C4 -->
+ <tr><td align="left"><a href="configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml">R50-C4, VOC</a></td>
+<td align="center">0.537</td>
+<td align="center">0.081</td>
+<td align="center">4.8</td>
+<td align="center">51.9</td>
+<td align="center">80.3</td>
+<td align="center"></td>
+<td align="center">142202221</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/model_final_b1acc2.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+
+
+### Other Settings
+
+Ablations for Deformable Conv and Cascade R-CNN:
+
+<!--
+./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml' 'Misc/*R_50_FPN_1x_dconv*' 'Misc/cascade*1x.yaml' 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/*R_50_FPN_3x_dconv*' 'Misc/cascade*3x.yaml' --name "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN" "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN"  --fields lr_sched train_speed inference_speed mem box_AP mask_AP
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: mask_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">Baseline R50-FPN</a></td>
+<td align="center">1x</td>
+<td align="center">0.261</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">38.6</td>
+<td align="center">35.2</td>
+<td align="center">137260431</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_1x_dconv_c3-c5 -->
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml">Deformable Conv</a></td>
+<td align="center">1x</td>
+<td align="center">0.342</td>
+<td align="center">0.048</td>
+<td align="center">3.5</td>
+<td align="center">41.5</td>
+<td align="center">37.5</td>
+<td align="center">138602867</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/model_final_65c703.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: cascade_mask_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml">Cascade R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.317</td>
+<td align="center">0.052</td>
+<td align="center">4.0</td>
+<td align="center">42.1</td>
+<td align="center">36.4</td>
+<td align="center">138602847</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/model_final_e9d89b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.261</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">41.0</td>
+<td align="center">37.2</td>
+<td align="center">137849600</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_3x_dconv_c3-c5 -->
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml">Deformable Conv</a></td>
+<td align="center">3x</td>
+<td align="center">0.349</td>
+<td align="center">0.047</td>
+<td align="center">3.5</td>
+<td align="center">42.7</td>
+<td align="center">38.5</td>
+<td align="center">144998336</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/model_final_821d0b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: cascade_mask_rcnn_R_50_FPN_3x -->
+ <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml">Cascade R-CNN</a></td>
+<td align="center">3x</td>
+<td align="center">0.328</td>
+<td align="center">0.053</td>
+<td align="center">4.0</td>
+<td align="center">44.3</td>
+<td align="center">38.5</td>
+<td align="center">144998488</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+
+Ablations for normalization methods, and a few models trained from scratch following [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883).
+(Note: The baseline uses `2fc` head while the others use [`4conv1fc` head](https://arxiv.org/abs/1803.08494))
+<!--
+./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/mask*50_FPN_3x_gn.yaml' 'Misc/mask*50_FPN_3x_syncbn.yaml' 'Misc/scratch*' --name "Baseline R50-FPN" "GN" "SyncBN" "GN (from scratch)" "GN (from scratch)" "SyncBN (from scratch)" --fields lr_sched train_speed inference_speed mem box_AP mask_AP
+   -->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: mask_rcnn_R_50_FPN_3x -->
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
+<td align="center">3x</td>
+<td align="center">0.261</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">41.0</td>
+<td align="center">37.2</td>
+<td align="center">137849600</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_3x_gn -->
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml">GN</a></td>
+<td align="center">3x</td>
+<td align="center">0.309</td>
+<td align="center">0.060</td>
+<td align="center">5.6</td>
+<td align="center">42.6</td>
+<td align="center">38.6</td>
+<td align="center">138602888</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/model_final_dc5d9e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_3x_syncbn -->
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml">SyncBN</a></td>
+<td align="center">3x</td>
+<td align="center">0.345</td>
+<td align="center">0.053</td>
+<td align="center">5.5</td>
+<td align="center">41.9</td>
+<td align="center">37.8</td>
+<td align="center">169527823</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/model_final_3b3c51.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: scratch_mask_rcnn_R_50_FPN_3x_gn -->
+ <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml">GN (from scratch)</a></td>
+<td align="center">3x</td>
+<td align="center">0.338</td>
+<td align="center">0.061</td>
+<td align="center">7.2</td>
+<td align="center">39.9</td>
+<td align="center">36.6</td>
+<td align="center">138602908</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_gn -->
+ <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml">GN (from scratch)</a></td>
+<td align="center">9x</td>
+<td align="center">N/A</td>
+<td align="center">0.061</td>
+<td align="center">7.2</td>
+<td align="center">43.7</td>
+<td align="center">39.6</td>
+<td align="center">183808979</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/model_final_da7b4c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_syncbn -->
+ <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml">SyncBN (from scratch)</a></td>
+<td align="center">9x</td>
+<td align="center">N/A</td>
+<td align="center">0.055</td>
+<td align="center">7.2</td>
+<td align="center">43.6</td>
+<td align="center">39.3</td>
+<td align="center">184226666</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/model_final_5ce33e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+
+A few very large models trained for a long time, for demo purposes. They are trained using multiple machines:
+
+<!--
+./gen_html_table.py --config 'Misc/panoptic_*dconv*' 'Misc/cascade_*152*' --name "Panoptic FPN R101" "Mask R-CNN X152" --fields inference_speed mem box_AP mask_AP PQ
+# manually add TTA results
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">PQ</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: panoptic_fpn_R_101_dconv_cascade_gn_3x -->
+ <tr><td align="left"><a href="configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml">Panoptic FPN R101</a></td>
+<td align="center">0.098</td>
+<td align="center">11.4</td>
+<td align="center">47.4</td>
+<td align="center">41.3</td>
+<td align="center">46.1</td>
+<td align="center">139797668</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/model_final_be35db.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
+ <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml">Mask R-CNN X152</a></td>
+<td align="center">0.234</td>
+<td align="center">15.1</td>
+<td align="center">50.2</td>
+<td align="center">44.0</td>
+<td align="center"></td>
+<td align="center">18131413</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/model_0039999_e76410.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: TTA cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
+ <tr><td align="left">above + test-time aug.</td>
+<td align="center"></td>
+<td align="center"></td>
+<td align="center">51.9</td>
+<td align="center">45.9</td>
+<td align="center"></td>
+<td align="center"></td>
+<td align="center"></td>
+</tr>
+</tbody></table>
diff --git a/src/sts/README.md b/src/sts/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..eec38870016f6006ab773d45cb874ccf57afe414
--- /dev/null
+++ b/src/sts/README.md
@@ -0,0 +1,159 @@
+
+# SwinTextSpotter
+
+<img src="demo/overall.png" width="100%">
+
+This is the pytorch implementation of Paper: SwinTextSpotter: Scene Text Spotting via Better Synergy between Text Detection and Text Recognition (CVPR 2022). The paper is available at [this link](https://arxiv.org/pdf/2203.10209.pdf).
+
+- We use the models pre-trained on ImageNet. The ImageNet pre-trained [SwinTransformer](https://drive.google.com/file/d/1wvzCMLJtEID8hBDu3wLpPv4xm3Es8ELC/view?usp=sharing) backbone is obtained from [SwinT_detectron2](https://github.com/xiaohu2015/SwinT_detectron2).
+
+## Models
+[SWINTS-swin-english-pretrain [config]](https://github.com/mxin262/SwinTextSpotter/blob/main/projects/SWINTS/configs/SWINTS-swin-pretrain.yaml) \| [model_Google Drive](https://drive.google.com/file/d/1q3cNhJYPIZ8Sbk0-4i_gnQIF6z09rCKh/view?usp=sharing) \| [model_BaiduYun](https://pan.baidu.com/s/1INNghiHoI_K6m2t9YxVCIw) PW: 954t
+
+[SWINTS-swin-Total-Text [config]](https://github.com/mxin262/SwinTextSpotter/blob/main/projects/SWINTS/configs/SWINTS-swin-finetune-totaltext.yaml) \| [model_Google Drive](https://drive.google.com/file/d/1o6LbT0NayfIzTtJpozAqtz50wrSNnKIJ/view?usp=sharing) \| [model_BaiduYun](https://pan.baidu.com/s/1fLqMa9r-Ea2wIT6I81bwhA) PW: tf0i
+
+[SWINTS-swin-ctw [config]](https://github.com/mxin262/SwinTextSpotter/blob/main/projects/SWINTS/configs/SWINTS-swin-finetune-ctw.yaml) \| [model_Google Drive](https://drive.google.com/file/d/1LC7-JFuQIIYeUt_KaDH61ICGvVkRqkz7/view?usp=sharing) \| [model_BaiduYun](https://pan.baidu.com/s/1q7zZQ1Hnl6QPmwfJXal98Q) PW: 4etq
+
+[SWINTS-swin-icdar2015 [config]](https://github.com/mxin262/SwinTextSpotter/blob/main/projects/SWINTS/configs/SWINTS-swin-finetune-ic15.yaml) \| [model_Google Drive](https://drive.google.com/file/d/15lDht7RtN092DeGggN5qoEyTTUkmIuGs/view?usp=sharing) \| [model_BaiduYun](https://pan.baidu.com/s/1bWTwmIrZOUNqEUqx5cKXng) PW: 3n82
+
+[SWINTS-swin-ReCTS [config]](https://github.com/mxin262/SwinTextSpotter/blob/main/projects/SWINTS/configs/SWINTS-swin-chn_finetune.yaml) \| [model_Google Drive](https://drive.google.com/file/d/1FLW35M18tw4fYSBL1qGzEOkTaD2t6mXT/view?usp=sharing) \| [model_BaiduYun](https://pan.baidu.com/s/1BHsLuwqUs_D_CO54UIaNPQ) PW: a4be
+
+[SWINTS-swin-vintext [config]](https://github.com/mxin262/SwinTextSpotter/blob/main/projects/SWINTS/configs/SWINTS-swin-finetune-vintext.yaml) \| [model_Google Drive](https://drive.google.com/file/d/1IfyPrYFnQOWoY8pPg-GIN5ofuALU15yD/view?usp=sharing) \| [model_BaiduYun](https://pan.baidu.com/s/1c5Xc9_lCun6mazhuxBk7sA) PW: slmp
+
+## Installation
+- Python=3.8
+- PyTorch=1.8.0, torchvision=0.9.0, cudatoolkit=11.1
+- OpenCV for visualization
+
+## Steps
+1. Install the repository (we recommend to use [Anaconda](https://www.anaconda.com/) for installation.)
+```
+conda create -n SWINTS python=3.8 -y
+conda activate SWINTS
+conda install pytorch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 cudatoolkit=11.1 -c pytorch -c conda-forge
+pip install opencv-python
+pip install scipy
+pip install shapely
+pip install rapidfuzz
+pip install timm
+pip install Polygon3
+git clone https://github.com/mxin262/SwinTextSpotter.git
+cd SwinTextSpotter
+python setup.py build develop
+```
+
+2. dataset path
+```
+datasets
+|_ totaltext
+|  |_ train_images
+|  |_ test_images
+|  |_ totaltext_train.json
+|  |_ weak_voc_new.txt
+|  |_ weak_voc_pair_list.txt
+|_ mlt2017
+|  |_ train_images
+|  |_ annotations/icdar_2017_mlt.json
+.......
+```
+Downloaded images
+- ICDAR2017-MLT [[image]](https://rrc.cvc.uab.es/?ch=8&com=downloads)
+- Syntext-150k: 
+  - Part1: 94,723 [[dataset]](https://universityofadelaide.box.com/s/xyqgqx058jlxiymiorw8fsfmxzf1n03p) 
+  - Part2: 54,327 [[dataset]](https://universityofadelaide.box.com/s/e0owoic8xacralf4j5slpgu50xfjoirs)
+- ICDAR2015 [[image]](https://rrc.cvc.uab.es/?ch=4&com=downloads)
+- ICDAR2013 [[image]](https://rrc.cvc.uab.es/?ch=2&com=downloads)
+- Total-Text_train_images [[image]](https://drive.google.com/file/d/1idATPS2Uc0PAwTBcT2ndYNLse3yKtT6G/view?usp=sharing)
+- Total-Text_test_images [[image]](https://drive.google.com/file/d/1P1mHAZN82HqR-YFui-wOTdp3zBY2N_lJ/view?usp=sharing)
+- ReCTs [[images&label]](https://pan.baidu.com/s/1JC0_rNbsyz564YakptP6Ow) PW: 2b4q
+- LSVT [[images&label]](https://pan.baidu.com/s/1j-zlH8SfmdTtH2OnuT9B7Q) PW: 9uh1
+- ArT [[images&label]](https://pan.baidu.com/s/165RtrJVIsJ3QqDjesoX1jQ) PW: 2865
+- SynChinese130k [[images]](https://drive.google.com/file/d/1w9BFDTfVgZvpLE003zM694E0we4OWmyP/view?usp=sharing)[[label]](https://drive.google.com/file/d/199sLThD_1e0vtDmpWrAEtUJyleS8DDTv/view?usp=sharing)
+- Vintext_images [[image]](https://drive.google.com/file/d/1O8t84JtlQZE9ev4dgHrK3TLfbzRu2z9E/view?usp=sharing)
+
+Downloaded label[[Google Drive]](https://drive.google.com/file/d/1wd_Z8UPNXRtnzU_qZCukKhxa_CDO5eaO/view?usp=sharing) [[BaiduYun]]( https://pan.baidu.com/s/1bFTlChn92GdOvcF4TfjjIw) PW: wpaf
+
+Downloader lexicion[[Google Drive]](https://drive.google.com/file/d/1jNX0NQKtyMC1pnh_IV__0drgNwTnupca/view?usp=sharing) and place it to corresponding dataset.
+
+You can also prepare your custom dataset following the example scripts.
+[[example scripts]](https://drive.google.com/file/d/1FE17GXyGPhDk5XI3EpbXwlOv1S8txOx2/view?usp=sharing)
+
+## Totaltext
+To evaluate on Total Text, CTW1500, ICDAR2015, first download the zipped annotations with
+
+```
+cd datasets
+mkdir evaluation
+cd evaluation
+wget -O gt_ctw1500.zip https://cloudstor.aarnet.edu.au/plus/s/xU3yeM3GnidiSTr/download
+wget -O gt_totaltext.zip https://cloudstor.aarnet.edu.au/plus/s/SFHvin8BLUM4cNd/download
+wget -O gt_icdar2015.zip https://drive.google.com/file/d/1wrq_-qIyb_8dhYVlDzLZTTajQzbic82Z/view?usp=sharing
+wget -O gt_vintext.zip https://drive.google.com/file/d/11lNH0uKfWJ7Wc74PGshWCOgSxgEnUPEV/view?usp=sharing
+```
+
+3. Pretrain SWINTS (e.g., with Swin-Transformer backbone)
+
+```
+python projects/SWINTS/train_net.py \
+  --num-gpus 8 \
+  --config-file projects/SWINTS/configs/SWINTS-swin-pretrain.yaml
+```
+
+4. Fine-tune model on the mixed real dataset
+
+```
+python projects/SWINTS/train_net.py \
+  --num-gpus 8 \
+  --config-file projects/SWINTS/configs/SWINTS-swin-mixtrain.yaml
+```
+
+5. Fine-tune model
+
+```
+python projects/SWINTS/train_net.py \
+  --num-gpus 8 \
+  --config-file projects/SWINTS/configs/SWINTS-swin-finetune-totaltext.yaml
+```
+
+6. Evaluate SWINTS (e.g., with Swin-Transformer backbone)
+```
+python projects/SWINTS/train_net.py \
+  --config-file projects/SWINTS/configs/SWINTS-swin-finetune-totaltext.yaml \
+  --eval-only MODEL.WEIGHTS ./output/model_final.pth
+```
+
+7. Visualize the detection and recognition results (e.g., with ResNet50 backbone)
+```
+python demo/demo.py \
+  --config-file projects/SWINTS/configs/SWINTS-swin-finetune-totaltext.yaml \
+  --input input1.jpg \
+  --output ./output \
+  --confidence-threshold 0.4 \
+  --opts MODEL.WEIGHTS ./output/model_final.pth
+```
+
+## Example results:
+
+<img src="demo/results.png" width="100%">
+
+## Acknowlegement
+[Adelaidet](https://github.com/aim-uofa/AdelaiDet), [Detectron2](https://github.com/facebookresearch/detectron2), [ISTR](https://github.com/hujiecpp/ISTR), [SwinT_detectron2](https://github.com/xiaohu2015/SwinT_detectron2), [Focal-Transformer](https://github.com/microsoft/Focal-Transformer) and [MaskTextSpotterV3](https://github.com/MhLiao/MaskTextSpotterV3).
+
+## Citation
+
+If our paper helps your research, please cite it in your publications:
+
+```BibText
+@article{huang2022swints,
+  title = {SwinTextSpotter: Scene Text Spotting via Better Synergy between Text Detection and Text Recognition},
+  author = {Mingxin Huang and YuLiang liu and Zhenghao Peng and Chongyu Liu and Dahua Lin and Shenggao Zhu and Nicholas Yuan and Kai Ding and Lianwen Jin},
+  journal={arXiv preprint arXiv:2203.10209},
+  year = {2022}
+}
+```
+
+# Copyright
+
+For commercial purpose usage, please contact Dr. Lianwen Jin: eelwjin@scut.edu.cn
+
+Copyright 2019, Deep Learning and Vision Computing Lab, South China China University of Technology. http://www.dlvc-lab.net
diff --git a/src/sts/VNFREE.ttf b/src/sts/VNFREE.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..1556c7689624b12d252b99f10c63cec7eacaae5d
Binary files /dev/null and b/src/sts/VNFREE.ttf differ
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/_C.cpython-38-x86_64-linux-gnu.so b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/_C.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..f91e4840eab0cd41f653dfa983933ea0c8fd12f1
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/_C.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8443dc289ba64e8e997f7ce7b88e54af11e54f8b9022c73b9cb76f78390cbd1
+size 21233064
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a951838f58f8bcf4b2b51a94b2ba31c53e8fe1af
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from .utils.env import setup_environment
+
+setup_environment()
+
+
+# This line will be programatically read/write by setup.py.
+# Leave them at the bottom of this file and don't touch them.
+__version__ = "0.4"
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..99da0469ae7e169d8970e4b642fed3f870076860
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/__init__.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+# File:
+
+
+from . import catalog as _UNUSED  # register the handler
+from .detection_checkpoint import DetectionCheckpointer
+from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
+
+__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/c2_model_loading.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/c2_model_loading.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c8d181bd7200bd3fd38446e743f8f16780d6e76
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/c2_model_loading.py
@@ -0,0 +1,407 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import re
+from typing import Dict, List
+import torch
+from tabulate import tabulate
+
+
+def convert_basic_c2_names(original_keys):
+    """
+    Apply some basic name conversion to names in C2 weights.
+    It only deals with typical backbone models.
+
+    Args:
+        original_keys (list[str]):
+    Returns:
+        list[str]: The same number of strings matching those in original_keys.
+    """
+    layer_keys = copy.deepcopy(original_keys)
+    layer_keys = [
+        {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
+    ]  # some hard-coded mappings
+
+    layer_keys = [k.replace("_", ".") for k in layer_keys]
+    layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
+    layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
+    # Uniform both bn and gn names to "norm"
+    layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
+    layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
+    layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
+
+    # stem
+    layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
+    # to avoid mis-matching with "conv1" in other components (e.g. detection head)
+    layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
+
+    # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
+    # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
+    # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
+    # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
+    # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
+
+    # blocks
+    layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
+    layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
+    layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
+    layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
+
+    # DensePose substitutions
+    layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
+    layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
+    layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
+    layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
+    layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
+    return layer_keys
+
+
+def convert_c2_detectron_names(weights):
+    """
+    Map Caffe2 Detectron weight names to Detectron2 names.
+
+    Args:
+        weights (dict): name -> tensor
+
+    Returns:
+        dict: detectron2 names -> tensor
+        dict: detectron2 names -> C2 names
+    """
+    logger = logging.getLogger(__name__)
+    logger.info("Renaming Caffe2 weights ......")
+    original_keys = sorted(weights.keys())
+    layer_keys = copy.deepcopy(original_keys)
+
+    layer_keys = convert_basic_c2_names(layer_keys)
+
+    # --------------------------------------------------------------------------
+    # RPN hidden representation conv
+    # --------------------------------------------------------------------------
+    # FPN case
+    # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
+    # shared for all other levels, hence the appearance of "fpn2"
+    layer_keys = [
+        k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys
+    ]
+    # Non-FPN case
+    layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # RPN box transformation conv
+    # --------------------------------------------------------------------------
+    # FPN case (see note above about "fpn2")
+    layer_keys = [
+        k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas")
+        for k in layer_keys
+    ]
+    layer_keys = [
+        k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits")
+        for k in layer_keys
+    ]
+    # Non-FPN case
+    layer_keys = [
+        k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys
+    ]
+    layer_keys = [
+        k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits")
+        for k in layer_keys
+    ]
+
+    # --------------------------------------------------------------------------
+    # Fast R-CNN box head
+    # --------------------------------------------------------------------------
+    layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
+    layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
+    layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
+    layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
+    # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
+    layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # FPN lateral and output convolutions
+    # --------------------------------------------------------------------------
+    def fpn_map(name):
+        """
+        Look for keys with the following patterns:
+        1) Starts with "fpn.inner."
+           Example: "fpn.inner.res2.2.sum.lateral.weight"
+           Meaning: These are lateral pathway convolutions
+        2) Starts with "fpn.res"
+           Example: "fpn.res2.2.sum.weight"
+           Meaning: These are FPN output convolutions
+        """
+        splits = name.split(".")
+        norm = ".norm" if "norm" in splits else ""
+        if name.startswith("fpn.inner."):
+            # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
+            stage = int(splits[2][len("res") :])
+            return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
+        elif name.startswith("fpn.res"):
+            # splits example: ['fpn', 'res2', '2', 'sum', 'weight']
+            stage = int(splits[1][len("res") :])
+            return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
+        return name
+
+    layer_keys = [fpn_map(k) for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # Mask R-CNN mask head
+    # --------------------------------------------------------------------------
+    # roi_heads.StandardROIHeads case
+    layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
+    layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
+    layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
+    # roi_heads.Res5ROIHeads case
+    layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # Keypoint R-CNN head
+    # --------------------------------------------------------------------------
+    # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
+    layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
+    layer_keys = [
+        k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys
+    ]
+    layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # Done with replacements
+    # --------------------------------------------------------------------------
+    assert len(set(layer_keys)) == len(layer_keys)
+    assert len(original_keys) == len(layer_keys)
+
+    new_weights = {}
+    new_keys_to_original_keys = {}
+    for orig, renamed in zip(original_keys, layer_keys):
+        new_keys_to_original_keys[renamed] = orig
+        if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
+            # remove the meaningless prediction weight for background class
+            new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
+            new_weights[renamed] = weights[orig][new_start_idx:]
+            logger.info(
+                "Remove prediction weight for background class in {}. The shape changes from "
+                "{} to {}.".format(
+                    renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape)
+                )
+            )
+        elif renamed.startswith("cls_score."):
+            # move weights of bg class from original index 0 to last index
+            logger.info(
+                "Move classification weights for background class in {} from index 0 to "
+                "index {}.".format(renamed, weights[orig].shape[0] - 1)
+            )
+            new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
+        else:
+            new_weights[renamed] = weights[orig]
+
+    return new_weights, new_keys_to_original_keys
+
+
+# Note the current matching is not symmetric.
+# it assumes model_state_dict will have longer names.
+def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
+    """
+    Match names between the two state-dict, and returns a new chkpt_state_dict with names
+    converted to match model_state_dict with heuristics. The returned dict can be later
+    loaded with fvcore checkpointer.
+    If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
+    model and will be renamed at first.
+
+    Strategy: suppose that the models that we will create will have prefixes appended
+    to each of its keys, for example due to an extra level of nesting that the original
+    pre-trained weights from ImageNet won't contain. For example, model.state_dict()
+    might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
+    res2.conv1.weight. We thus want to match both parameters together.
+    For that, we look for each model weight, look among all loaded keys if there is one
+    that is a suffix of the current weight name, and use it if that's the case.
+    If multiple matches exist, take the one with longest size
+    of the corresponding name. For example, for the same model as before, the pretrained
+    weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
+    we want to match backbone[0].body.conv1.weight to conv1.weight, and
+    backbone[0].body.res2.conv1.weight to res2.conv1.weight.
+    """
+    model_keys = sorted(model_state_dict.keys())
+    if c2_conversion:
+        ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
+        # original_keys: the name in the original dict (before renaming)
+    else:
+        original_keys = {x: x for x in ckpt_state_dict.keys()}
+    ckpt_keys = sorted(ckpt_state_dict.keys())
+
+    def match(a, b):
+        # Matched ckpt_key should be a complete (starts with '.') suffix.
+        # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
+        # but matches whatever_conv1 or mesh_head.whatever_conv1.
+        return a == b or a.endswith("." + b)
+
+    # get a matrix of string matches, where each (i, j) entry correspond to the size of the
+    # ckpt_key string, if it matches
+    match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
+    match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
+    # use the matched one with longest size in case of multiple matches
+    max_match_size, idxs = match_matrix.max(1)
+    # remove indices that correspond to no-match
+    idxs[max_match_size == 0] = -1
+
+    logger = logging.getLogger(__name__)
+    # matched_pairs (matched checkpoint key --> matched model key)
+    matched_keys = {}
+    result_state_dict = {}
+    for idx_model, idx_ckpt in enumerate(idxs.tolist()):
+        if idx_ckpt == -1:
+            continue
+        key_model = model_keys[idx_model]
+        key_ckpt = ckpt_keys[idx_ckpt]
+        value_ckpt = ckpt_state_dict[key_ckpt]
+        shape_in_model = model_state_dict[key_model].shape
+
+        if shape_in_model != value_ckpt.shape:
+            logger.warning(
+                "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
+                    key_ckpt, value_ckpt.shape, key_model, shape_in_model
+                )
+            )
+            logger.warning(
+                "{} will not be loaded. Please double check and see if this is desired.".format(
+                    key_ckpt
+                )
+            )
+            continue
+
+        assert key_model not in result_state_dict
+        result_state_dict[key_model] = value_ckpt
+        if key_ckpt in matched_keys:  # already added to matched_keys
+            logger.error(
+                "Ambiguity found for {} in checkpoint!"
+                "It matches at least two keys in the model ({} and {}).".format(
+                    key_ckpt, key_model, matched_keys[key_ckpt]
+                )
+            )
+            raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
+
+        matched_keys[key_ckpt] = key_model
+
+    # logging:
+    matched_model_keys = sorted(matched_keys.values())
+    if len(matched_model_keys) == 0:
+        logger.warning("No weights in checkpoint matched with model.")
+        return ckpt_state_dict
+    common_prefix = _longest_common_prefix(matched_model_keys)
+    rev_matched_keys = {v: k for k, v in matched_keys.items()}
+    original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys}
+
+    model_key_groups = _group_keys_by_module(matched_model_keys, original_keys)
+    table = []
+    memo = set()
+    for key_model in matched_model_keys:
+        if key_model in memo:
+            continue
+        if key_model in model_key_groups:
+            group = model_key_groups[key_model]
+            memo |= set(group)
+            shapes = [tuple(model_state_dict[k].shape) for k in group]
+            table.append(
+                (
+                    _longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*",
+                    _group_str([original_keys[k] for k in group]),
+                    " ".join([str(x).replace(" ", "") for x in shapes]),
+                )
+            )
+        else:
+            key_checkpoint = original_keys[key_model]
+            shape = str(tuple(model_state_dict[key_model].shape))
+            table.append((key_model[len(common_prefix) :], key_checkpoint, shape))
+    table_str = tabulate(
+        table, tablefmt="pipe", headers=["Names in Model", "Names in Checkpoint", "Shapes"]
+    )
+    logger.info(
+        "Following weights matched with "
+        + (f"submodule {common_prefix[:-1]}" if common_prefix else "model")
+        + ":\n"
+        + table_str
+    )
+
+    unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())]
+    for k in unmatched_ckpt_keys:
+        result_state_dict[k] = ckpt_state_dict[k]
+    return result_state_dict
+
+
+def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]):
+    """
+    Params in the same submodule are grouped together.
+
+    Args:
+        keys: names of all parameters
+        original_names: mapping from parameter name to their name in the checkpoint
+
+    Returns:
+        dict[name -> all other names in the same group]
+    """
+
+    def _submodule_name(key):
+        pos = key.rfind(".")
+        if pos < 0:
+            return None
+        prefix = key[: pos + 1]
+        return prefix
+
+    all_submodules = [_submodule_name(k) for k in keys]
+    all_submodules = [x for x in all_submodules if x]
+    all_submodules = sorted(all_submodules, key=len)
+
+    ret = {}
+    for prefix in all_submodules:
+        group = [k for k in keys if k.startswith(prefix)]
+        if len(group) <= 1:
+            continue
+        original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group])
+        if len(original_name_lcp) == 0:
+            # don't group weights if original names don't share prefix
+            continue
+
+        for k in group:
+            if k in ret:
+                continue
+            ret[k] = group
+    return ret
+
+
+def _longest_common_prefix(names: List[str]) -> str:
+    """
+    ["abc.zfg", "abc.zef"] -> "abc."
+    """
+    names = [n.split(".") for n in names]
+    m1, m2 = min(names), max(names)
+    ret = [a for a, b in zip(m1, m2) if a == b]
+    ret = ".".join(ret) + "." if len(ret) else ""
+    return ret
+
+
+def _longest_common_prefix_str(names: List[str]) -> str:
+    m1, m2 = min(names), max(names)
+    lcp = [a for a, b in zip(m1, m2) if a == b]
+    lcp = "".join(lcp)
+    return lcp
+
+
+def _group_str(names: List[str]) -> str:
+    """
+    Turn "common1", "common2", "common3" into "common{1,2,3}"
+    """
+    lcp = _longest_common_prefix_str(names)
+    rest = [x[len(lcp) :] for x in names]
+    rest = "{" + ",".join(rest) + "}"
+    ret = lcp + rest
+
+    # add some simplification for BN specifically
+    ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*")
+    ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*")
+    return ret
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/catalog.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/catalog.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a85736754a0de4550df96c22f38fc515bd02d71
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/catalog.py
@@ -0,0 +1,115 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+
+from detectron2.utils.file_io import PathHandler, PathManager
+
+
+class ModelCatalog(object):
+    """
+    Store mappings from names to third-party models.
+    """
+
+    S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
+
+    # MSRA models have STRIDE_IN_1X1=True. False otherwise.
+    # NOTE: all BN models here have fused BN into an affine layer.
+    # As a result, you should only load them to a model with "FrozenBN".
+    # Loading them to a model with regular BN or SyncBN is wrong.
+    # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
+    # which should be negligible for training.
+    # NOTE: all models here uses PIXEL_STD=[1,1,1]
+    # NOTE: Most of the BN models here are no longer used. We use the
+    # re-converted pre-trained models under detectron2 model zoo instead.
+    C2_IMAGENET_MODELS = {
+        "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
+        "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
+        "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
+        "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
+        "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
+        "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
+        "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
+    }
+
+    C2_DETECTRON_PATH_FORMAT = (
+        "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl"  # noqa B950
+    )
+
+    C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
+    C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
+
+    # format: {model_name} -> part of the url
+    C2_DETECTRON_MODELS = {
+        "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW",  # noqa B950
+        "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I",  # noqa B950
+        "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7",  # noqa B950
+        "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ",  # noqa B950
+        "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB",  # noqa B950
+        "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC",  # noqa B950
+        "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT",  # noqa B950
+        "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI",  # noqa B950
+        "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q",  # noqa B950
+        "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao",  # noqa B950
+        "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L",  # noqa B950
+        "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179",  # noqa B950
+        "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2",  # noqa B950
+    }
+
+    @staticmethod
+    def get(name):
+        if name.startswith("Caffe2Detectron/COCO"):
+            return ModelCatalog._get_c2_detectron_baseline(name)
+        if name.startswith("ImageNetPretrained/"):
+            return ModelCatalog._get_c2_imagenet_pretrained(name)
+        raise RuntimeError("model not present in the catalog: {}".format(name))
+
+    @staticmethod
+    def _get_c2_imagenet_pretrained(name):
+        prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
+        name = name[len("ImageNetPretrained/") :]
+        name = ModelCatalog.C2_IMAGENET_MODELS[name]
+        url = "/".join([prefix, name])
+        return url
+
+    @staticmethod
+    def _get_c2_detectron_baseline(name):
+        name = name[len("Caffe2Detectron/COCO/") :]
+        url = ModelCatalog.C2_DETECTRON_MODELS[name]
+        if "keypoint_rcnn" in name:
+            dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
+        else:
+            dataset = ModelCatalog.C2_DATASET_COCO
+
+        if "35998355/rpn_R-50-C4_1x" in name:
+            # this one model is somehow different from others ..
+            type = "rpn"
+        else:
+            type = "generalized_rcnn"
+
+        # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
+        url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
+            prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
+        )
+        return url
+
+
+class ModelCatalogHandler(PathHandler):
+    """
+    Resolve URL like catalog://.
+    """
+
+    PREFIX = "catalog://"
+
+    def _get_supported_prefixes(self):
+        return [self.PREFIX]
+
+    def _get_local_path(self, path, **kwargs):
+        logger = logging.getLogger(__name__)
+        catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
+        logger.info("Catalog entry {} points to {}".format(path, catalog_path))
+        return PathManager.get_local_path(catalog_path, **kwargs)
+
+    def _open(self, path, mode="r", **kwargs):
+        return PathManager.open(self._get_local_path(path), mode, **kwargs)
+
+
+PathManager.register_handler(ModelCatalogHandler())
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/detection_checkpoint.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/detection_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..da979ca72ae76eaee9a4478c01bffc8f58474a18
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/checkpoint/detection_checkpoint.py
@@ -0,0 +1,70 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import pickle
+from fvcore.common.checkpoint import Checkpointer
+
+import detectron2.utils.comm as comm
+from detectron2.utils.file_io import PathManager
+
+from .c2_model_loading import align_and_update_state_dicts
+
+
+class DetectionCheckpointer(Checkpointer):
+    """
+    Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2
+    model zoo, and apply conversions for legacy models.
+    """
+
+    def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
+        is_main_process = comm.is_main_process()
+        super().__init__(
+            model,
+            save_dir,
+            save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
+            **checkpointables,
+        )
+        self.path_manager = PathManager
+
+    def _load_file(self, filename):
+        if filename.endswith(".pkl"):
+            with PathManager.open(filename, "rb") as f:
+                data = pickle.load(f, encoding="latin1")
+            if "model" in data and "__author__" in data:
+                # file is in Detectron2 model zoo format
+                self.logger.info("Reading a file from '{}'".format(data["__author__"]))
+                return data
+            else:
+                # assume file is from Caffe2 / Detectron1 model zoo
+                if "blobs" in data:
+                    # Detection models have "blobs", but ImageNet models don't
+                    data = data["blobs"]
+                data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
+                return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
+
+        loaded = super()._load_file(filename)  # load native pth checkpoint
+        if "model" not in loaded:
+            loaded = {"model": loaded}
+        return loaded
+
+    def _load_model(self, checkpoint):
+        if checkpoint.get("matching_heuristics", False):
+            self._convert_ndarray_to_tensor(checkpoint["model"])
+            # convert weights by name-matching heuristics
+            checkpoint["model"] = align_and_update_state_dicts(
+                self.model.state_dict(),
+                checkpoint["model"],
+                c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
+            )
+        # for non-caffe2 models, use standard ways to load it
+        incompatible = super()._load_model(checkpoint)
+
+        model_buffers = dict(self.model.named_buffers(recurse=False))
+        for k in ["pixel_mean", "pixel_std"]:
+            # Ignore missing key message about pixel_mean/std.
+            # Though they may be missing in old checkpoints, they will be correctly
+            # initialized from config anyway.
+            if k in model_buffers:
+                try:
+                    incompatible.missing_keys.remove(k)
+                except ValueError:
+                    pass
+        return incompatible
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3669f7ebe8fffc3539a10932ecccc128a8cc4b6
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .compat import downgrade_config, upgrade_config
+from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
+
+__all__ = [
+    "CfgNode",
+    "get_cfg",
+    "global_cfg",
+    "set_global_cfg",
+    "downgrade_config",
+    "upgrade_config",
+    "configurable",
+]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/compat.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..11a08c439bf14defd880e37a938fab8a08e68eeb
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/compat.py
@@ -0,0 +1,229 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Backward compatibility of configs.
+
+Instructions to bump version:
++ It's not needed to bump version if new keys are added.
+  It's only needed when backward-incompatible changes happen
+  (i.e., some existing keys disappear, or the meaning of a key changes)
++ To bump version, do the following:
+    1. Increment _C.VERSION in defaults.py
+    2. Add a converter in this file.
+
+      Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X,
+      and a function "downgrade" which in-place downgrades config from X to X-1
+
+      In each function, VERSION is left unchanged.
+
+      Each converter assumes that its input has the relevant keys
+      (i.e., the input is not a partial config).
+    3. Run the tests (test_config.py) to make sure the upgrade & downgrade
+       functions are consistent.
+"""
+
+import logging
+from typing import List, Optional, Tuple
+
+from .config import CfgNode as CN
+from .defaults import _C
+
+__all__ = ["upgrade_config", "downgrade_config"]
+
+
+def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN:
+    """
+    Upgrade a config from its current version to a newer version.
+
+    Args:
+        cfg (CfgNode):
+        to_version (int): defaults to the latest version.
+    """
+    cfg = cfg.clone()
+    if to_version is None:
+        to_version = _C.VERSION
+
+    assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format(
+        cfg.VERSION, to_version
+    )
+    for k in range(cfg.VERSION, to_version):
+        converter = globals()["ConverterV" + str(k + 1)]
+        converter.upgrade(cfg)
+        cfg.VERSION = k + 1
+    return cfg
+
+
+def downgrade_config(cfg: CN, to_version: int) -> CN:
+    """
+    Downgrade a config from its current version to an older version.
+
+    Args:
+        cfg (CfgNode):
+        to_version (int):
+
+    Note:
+        A general downgrade of arbitrary configs is not always possible due to the
+        different functionalities in different versions.
+        The purpose of downgrade is only to recover the defaults in old versions,
+        allowing it to load an old partial yaml config.
+        Therefore, the implementation only needs to fill in the default values
+        in the old version when a general downgrade is not possible.
+    """
+    cfg = cfg.clone()
+    assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format(
+        cfg.VERSION, to_version
+    )
+    for k in range(cfg.VERSION, to_version, -1):
+        converter = globals()["ConverterV" + str(k)]
+        converter.downgrade(cfg)
+        cfg.VERSION = k - 1
+    return cfg
+
+
+def guess_version(cfg: CN, filename: str) -> int:
+    """
+    Guess the version of a partial config where the VERSION field is not specified.
+    Returns the version, or the latest if cannot make a guess.
+
+    This makes it easier for users to migrate.
+    """
+    logger = logging.getLogger(__name__)
+
+    def _has(name: str) -> bool:
+        cur = cfg
+        for n in name.split("."):
+            if n not in cur:
+                return False
+            cur = cur[n]
+        return True
+
+    # Most users' partial configs have "MODEL.WEIGHT", so guess on it
+    ret = None
+    if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"):
+        ret = 1
+
+    if ret is not None:
+        logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret))
+    else:
+        ret = _C.VERSION
+        logger.warning(
+            "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format(
+                filename, ret
+            )
+        )
+    return ret
+
+
+def _rename(cfg: CN, old: str, new: str) -> None:
+    old_keys = old.split(".")
+    new_keys = new.split(".")
+
+    def _set(key_seq: List[str], val: str) -> None:
+        cur = cfg
+        for k in key_seq[:-1]:
+            if k not in cur:
+                cur[k] = CN()
+            cur = cur[k]
+        cur[key_seq[-1]] = val
+
+    def _get(key_seq: List[str]) -> CN:
+        cur = cfg
+        for k in key_seq:
+            cur = cur[k]
+        return cur
+
+    def _del(key_seq: List[str]) -> None:
+        cur = cfg
+        for k in key_seq[:-1]:
+            cur = cur[k]
+        del cur[key_seq[-1]]
+        if len(cur) == 0 and len(key_seq) > 1:
+            _del(key_seq[:-1])
+
+    _set(new_keys, _get(old_keys))
+    _del(old_keys)
+
+
+class _RenameConverter:
+    """
+    A converter that handles simple rename.
+    """
+
+    RENAME: List[Tuple[str, str]] = []  # list of tuples of (old name, new name)
+
+    @classmethod
+    def upgrade(cls, cfg: CN) -> None:
+        for old, new in cls.RENAME:
+            _rename(cfg, old, new)
+
+    @classmethod
+    def downgrade(cls, cfg: CN) -> None:
+        for old, new in cls.RENAME[::-1]:
+            _rename(cfg, new, old)
+
+
+class ConverterV1(_RenameConverter):
+    RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")]
+
+
+class ConverterV2(_RenameConverter):
+    """
+    A large bulk of rename, before public release.
+    """
+
+    RENAME = [
+        ("MODEL.WEIGHT", "MODEL.WEIGHTS"),
+        ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"),
+        ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"),
+        ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"),
+        ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"),
+        (
+            "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD",
+            "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH",
+        ),
+        (
+            "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT",
+            "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT",
+        ),
+        (
+            "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD",
+            "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH",
+        ),
+        ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"),
+        ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"),
+        ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"),
+        ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"),
+        ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"),
+        ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"),
+        ("TEST.AUG_ON", "TEST.AUG.ENABLED"),
+        ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"),
+        ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"),
+        ("TEST.AUG_FLIP", "TEST.AUG.FLIP"),
+    ]
+
+    @classmethod
+    def upgrade(cls, cfg: CN) -> None:
+        super().upgrade(cfg)
+
+        if cfg.MODEL.META_ARCHITECTURE == "RetinaNet":
+            _rename(
+                cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS"
+            )
+            _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
+            del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"]
+            del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"]
+        else:
+            _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS")
+            _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
+            del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"]
+            del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"]
+        del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"]
+
+    @classmethod
+    def downgrade(cls, cfg: CN) -> None:
+        super().downgrade(cfg)
+
+        _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS")
+        _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES")
+        cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS
+        cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES
+        cfg.MODEL.RETINANET.ANCHOR_STRIDES = []  # this is not used anywhere in any version
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/config.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c574b4805c7e0e0a0d0aeb9ca49ca51a2f18c44
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/config.py
@@ -0,0 +1,249 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import functools
+import inspect
+import logging
+from fvcore.common.config import CfgNode as _CfgNode
+
+from detectron2.utils.file_io import PathManager
+
+
+class CfgNode(_CfgNode):
+    """
+    The same as `fvcore.common.config.CfgNode`, but different in:
+
+    1. Use unsafe yaml loading by default.
+       Note that this may lead to arbitrary code execution: you must not
+       load a config file from untrusted sources before manually inspecting
+       the content of the file.
+    2. Support config versioning.
+       When attempting to merge an old config, it will convert the old config automatically.
+    """
+
+    @classmethod
+    def _open_cfg(cls, filename):
+        return PathManager.open(filename, "r")
+
+    # Note that the default value of allow_unsafe is changed to True
+    def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None:
+        assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!"
+        loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
+        loaded_cfg = type(self)(loaded_cfg)
+
+        # defaults.py needs to import CfgNode
+        from .defaults import _C
+
+        latest_ver = _C.VERSION
+        assert (
+            latest_ver == self.VERSION
+        ), "CfgNode.merge_from_file is only allowed on a config object of latest version!"
+
+        logger = logging.getLogger(__name__)
+
+        loaded_ver = loaded_cfg.get("VERSION", None)
+        if loaded_ver is None:
+            from .compat import guess_version
+
+            loaded_ver = guess_version(loaded_cfg, cfg_filename)
+        assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
+            loaded_ver, self.VERSION
+        )
+
+        if loaded_ver == self.VERSION:
+            self.merge_from_other_cfg(loaded_cfg)
+        else:
+            # compat.py needs to import CfgNode
+            from .compat import upgrade_config, downgrade_config
+
+            logger.warning(
+                "Loading an old v{} config file '{}' by automatically upgrading to v{}. "
+                "See docs/CHANGELOG.md for instructions to update your files.".format(
+                    loaded_ver, cfg_filename, self.VERSION
+                )
+            )
+            # To convert, first obtain a full config at an old version
+            old_self = downgrade_config(self, to_version=loaded_ver)
+            old_self.merge_from_other_cfg(loaded_cfg)
+            new_config = upgrade_config(old_self)
+            self.clear()
+            self.update(new_config)
+
+    def dump(self, *args, **kwargs):
+        """
+        Returns:
+            str: a yaml string representation of the config
+        """
+        # to make it show up in docs
+        return super().dump(*args, **kwargs)
+
+
+global_cfg = CfgNode()
+
+
+def get_cfg() -> CfgNode:
+    """
+    Get a copy of the default config.
+
+    Returns:
+        a detectron2 CfgNode instance.
+    """
+    from .defaults import _C
+
+    return _C.clone()
+
+
+def set_global_cfg(cfg: CfgNode) -> None:
+    """
+    Let the global config point to the given cfg.
+
+    Assume that the given "cfg" has the key "KEY", after calling
+    `set_global_cfg(cfg)`, the key can be accessed by:
+    ::
+        from detectron2.config import global_cfg
+        print(global_cfg.KEY)
+
+    By using a hacky global config, you can access these configs anywhere,
+    without having to pass the config object or the values deep into the code.
+    This is a hacky feature introduced for quick prototyping / research exploration.
+    """
+    global global_cfg
+    global_cfg.clear()
+    global_cfg.update(cfg)
+
+
+def configurable(init_func=None, *, from_config=None):
+    """
+    Decorate a function or a class's __init__ method so that it can be called
+    with a :class:`CfgNode` object using a :func:`from_config` function that translates
+    :class:`CfgNode` to arguments.
+
+    Examples:
+    ::
+        # Usage 1: Decorator on __init__:
+        class A:
+            @configurable
+            def __init__(self, a, b=2, c=3):
+                pass
+
+            @classmethod
+            def from_config(cls, cfg):   # 'cfg' must be the first argument
+                # Returns kwargs to be passed to __init__
+                return {"a": cfg.A, "b": cfg.B}
+
+        a1 = A(a=1, b=2)  # regular construction
+        a2 = A(cfg)       # construct with a cfg
+        a3 = A(cfg, b=3, c=4)  # construct with extra overwrite
+
+        # Usage 2: Decorator on any function. Needs an extra from_config argument:
+        @configurable(from_config=lambda cfg: {"a: cfg.A, "b": cfg.B})
+        def a_func(a, b=2, c=3):
+            pass
+
+        a1 = a_func(a=1, b=2)  # regular call
+        a2 = a_func(cfg)       # call with a cfg
+        a3 = a_func(cfg, b=3, c=4)  # call with extra overwrite
+
+    Args:
+        init_func (callable): a class's ``__init__`` method in usage 1. The
+            class must have a ``from_config`` classmethod which takes `cfg` as
+            the first argument.
+        from_config (callable): the from_config function in usage 2. It must take `cfg`
+            as its first argument.
+    """
+
+    if init_func is not None:
+        assert (
+            inspect.isfunction(init_func)
+            and from_config is None
+            and init_func.__name__ == "__init__"
+        ), "Incorrect use of @configurable. Check API documentation for examples."
+
+        @functools.wraps(init_func)
+        def wrapped(self, *args, **kwargs):
+            try:
+                from_config_func = type(self).from_config
+            except AttributeError as e:
+                raise AttributeError(
+                    "Class with @configurable must have a 'from_config' classmethod."
+                ) from e
+            if not inspect.ismethod(from_config_func):
+                raise TypeError("Class with @configurable must have a 'from_config' classmethod.")
+
+            if _called_with_cfg(*args, **kwargs):
+                explicit_args = _get_args_from_config(from_config_func, *args, **kwargs)
+                init_func(self, **explicit_args)
+            else:
+                init_func(self, *args, **kwargs)
+
+        return wrapped
+
+    else:
+        if from_config is None:
+            return configurable  # @configurable() is made equivalent to @configurable
+        assert inspect.isfunction(
+            from_config
+        ), "from_config argument of configurable must be a function!"
+
+        def wrapper(orig_func):
+            @functools.wraps(orig_func)
+            def wrapped(*args, **kwargs):
+                if _called_with_cfg(*args, **kwargs):
+                    explicit_args = _get_args_from_config(from_config, *args, **kwargs)
+                    return orig_func(**explicit_args)
+                else:
+                    return orig_func(*args, **kwargs)
+
+            return wrapped
+
+        return wrapper
+
+
+def _get_args_from_config(from_config_func, *args, **kwargs):
+    """
+    Use `from_config` to obtain explicit arguments.
+
+    Returns:
+        dict: arguments to be used for cls.__init__
+    """
+    signature = inspect.signature(from_config_func)
+    if list(signature.parameters.keys())[0] != "cfg":
+        if inspect.isfunction(from_config_func):
+            name = from_config_func.__name__
+        else:
+            name = f"{from_config_func.__self__}.from_config"
+        raise TypeError(f"{name} must take 'cfg' as the first argument!")
+    support_var_arg = any(
+        param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD]
+        for param in signature.parameters.values()
+    )
+    if support_var_arg:  # forward all arguments to from_config, if from_config accepts them
+        ret = from_config_func(*args, **kwargs)
+    else:
+        # forward supported arguments to from_config
+        supported_arg_names = set(signature.parameters.keys())
+        extra_kwargs = {}
+        for name in list(kwargs.keys()):
+            if name not in supported_arg_names:
+                extra_kwargs[name] = kwargs.pop(name)
+        ret = from_config_func(*args, **kwargs)
+        # forward the other arguments to __init__
+        ret.update(extra_kwargs)
+    return ret
+
+
+def _called_with_cfg(*args, **kwargs):
+    """
+    Returns:
+        bool: whether the arguments contain CfgNode and should be considered
+            forwarded to from_config.
+    """
+    from omegaconf import DictConfig
+
+    if len(args) and isinstance(args[0], (_CfgNode, DictConfig)):
+        return True
+    if isinstance(kwargs.pop("cfg", None), (_CfgNode, DictConfig)):
+        return True
+    # `from_config`'s first argument is forced to be "cfg".
+    # So the above check covers all cases.
+    return False
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/defaults.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/defaults.py
new file mode 100644
index 0000000000000000000000000000000000000000..a334c14dc765e20b4d95cea8e1ff990193c80af6
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/defaults.py
@@ -0,0 +1,628 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .config import CfgNode as CN
+
+# -----------------------------------------------------------------------------
+# Convention about Training / Test specific parameters
+# -----------------------------------------------------------------------------
+# Whenever an argument can be either used for training or for testing, the
+# corresponding name will be post-fixed by a _TRAIN for a training parameter,
+# or _TEST for a test-specific parameter.
+# For example, the number of images during training will be
+# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
+# IMAGES_PER_BATCH_TEST
+
+# -----------------------------------------------------------------------------
+# Config definition
+# -----------------------------------------------------------------------------
+
+_C = CN()
+
+# The version number, to upgrade from old configs to new ones if any
+# changes happen. It's recommended to keep a VERSION in your config file.
+_C.VERSION = 2
+
+_C.MODEL = CN()
+_C.MODEL.LOAD_PROPOSALS = False
+_C.MODEL.MASK_ON = False
+_C.MODEL.KEYPOINT_ON = False
+_C.MODEL.DEVICE = "cuda"
+_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
+
+# Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file
+# to be loaded to the model. You can find available models in the model zoo.
+_C.MODEL.WEIGHTS = ""
+
+# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
+# To train on images of different number of channels, just set different mean & std.
+# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
+_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675]
+# When using pre-trained models in Detectron1 or any MSRA models,
+# std has been absorbed into its conv1 weights, so the std needs to be set 1.
+# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
+_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0]
+
+
+# -----------------------------------------------------------------------------
+# INPUT
+# -----------------------------------------------------------------------------
+_C.INPUT = CN()
+# Size of the smallest side of the image during training
+_C.INPUT.MIN_SIZE_TRAIN = (800,)
+# Sample size of smallest side by choice or random selection from range give by
+# INPUT.MIN_SIZE_TRAIN
+_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
+# Maximum size of the side of the image during training
+_C.INPUT.MAX_SIZE_TRAIN = 1333
+# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
+_C.INPUT.MIN_SIZE_TEST = 800
+# Maximum size of the side of the image during testing
+_C.INPUT.MAX_SIZE_TEST = 1333
+# Mode for flipping images used in data augmentation during training
+# choose one of ["horizontal, "vertical", "none"]
+_C.INPUT.RANDOM_FLIP = "horizontal"
+
+# `True` if cropping is used for data augmentation during training
+_C.INPUT.CROP = CN({"ENABLED": False})
+# Cropping type. See documentation of `detectron2.data.transforms.RandomCrop` for explanation.
+_C.INPUT.CROP.TYPE = "relative_range"
+# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
+# pixels if CROP.TYPE is "absolute"
+_C.INPUT.CROP.SIZE = [0.9, 0.9]
+_C.INPUT.CROP.CROP_INSTANCE = False
+
+# Whether the model needs RGB, YUV, HSV etc.
+# Should be one of the modes defined here, as we use PIL to read the image:
+# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
+# with BGR being the one exception. One can set image format to BGR, we will
+# internally use RGB for conversion and flip the channels over
+_C.INPUT.FORMAT = "BGR"
+# The ground truth mask format that the model will use.
+# Mask R-CNN supports either "polygon" or "bitmask" as ground truth.
+_C.INPUT.MASK_FORMAT = "polygon"  # alternative: "bitmask"
+
+
+# -----------------------------------------------------------------------------
+# Dataset
+# -----------------------------------------------------------------------------
+_C.DATASETS = CN()
+# List of the dataset names for training. Must be registered in DatasetCatalog
+# Samples from these datasets will be merged and used as one dataset.
+_C.DATASETS.TRAIN = ()
+# List of the pre-computed proposal files for training, which must be consistent
+# with datasets listed in DATASETS.TRAIN.
+_C.DATASETS.PROPOSAL_FILES_TRAIN = ()
+# Number of top scoring precomputed proposals to keep for training
+_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000
+# List of the dataset names for testing. Must be registered in DatasetCatalog
+_C.DATASETS.TEST = ()
+# List of the pre-computed proposal files for test, which must be consistent
+# with datasets listed in DATASETS.TEST.
+_C.DATASETS.PROPOSAL_FILES_TEST = ()
+# Number of top scoring precomputed proposals to keep for test
+_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000
+
+# -----------------------------------------------------------------------------
+# DataLoader
+# -----------------------------------------------------------------------------
+_C.DATALOADER = CN()
+# Number of data loading threads
+_C.DATALOADER.NUM_WORKERS = 4
+# If True, each batch should contain only images for which the aspect ratio
+# is compatible. This groups portrait images together, and landscape images
+# are not batched with portrait images.
+_C.DATALOADER.ASPECT_RATIO_GROUPING = True
+# Options: TrainingSampler, RepeatFactorTrainingSampler
+_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"
+# Repeat threshold for RepeatFactorTrainingSampler
+_C.DATALOADER.REPEAT_THRESHOLD = 0.0
+# Tf True, when working on datasets that have instance annotations, the
+# training dataloader will filter out images without associated annotations
+_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
+
+# ---------------------------------------------------------------------------- #
+# Backbone options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.BACKBONE = CN()
+
+_C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
+# Freeze the first several stages so they are not trained.
+# There are 5 stages in ResNet. The first is a convolution, and the following
+# stages are each group of residual blocks.
+_C.MODEL.BACKBONE.FREEZE_AT = 2
+
+
+# ---------------------------------------------------------------------------- #
+# FPN options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.FPN = CN()
+# Names of the input feature maps to be used by FPN
+# They must have contiguous power of 2 strides
+# e.g., ["res2", "res3", "res4", "res5"]
+_C.MODEL.FPN.IN_FEATURES = []
+_C.MODEL.FPN.OUT_CHANNELS = 256
+
+# Options: "" (no norm), "GN"
+_C.MODEL.FPN.NORM = ""
+
+# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
+_C.MODEL.FPN.FUSE_TYPE = "sum"
+
+
+# ---------------------------------------------------------------------------- #
+# Proposal generator options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.PROPOSAL_GENERATOR = CN()
+# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals"
+_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
+# Proposal height and width both need to be greater than MIN_SIZE
+# (a the scale used during training or inference)
+_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0
+
+
+# ---------------------------------------------------------------------------- #
+# Anchor generator options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ANCHOR_GENERATOR = CN()
+# The generator can be any name in the ANCHOR_GENERATOR registry
+_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
+# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
+# Format: list[list[float]]. SIZES[i] specifies the list of sizes to use for
+# IN_FEATURES[i]; len(SIZES) must be equal to len(IN_FEATURES) or 1.
+# When len(SIZES) == 1, SIZES[0] is used for all IN_FEATURES.
+_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
+# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
+# ratios are generated by an anchor generator.
+# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
+# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
+# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
+# for all IN_FEATURES.
+_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
+# Anchor angles.
+# list[list[float]], the angle in degrees, for each input feature map.
+# ANGLES[i] specifies the list of angles for IN_FEATURES[i].
+_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
+# Relative offset between the center of the first anchor and the top-left corner of the image
+# Value has to be in [0, 1). Recommend to use 0.5, which means half stride.
+# The value is not expected to affect model accuracy.
+_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0
+
+# ---------------------------------------------------------------------------- #
+# RPN options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.RPN = CN()
+_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead"  # used by RPN_HEAD_REGISTRY
+
+# Names of the input feature maps to be used by RPN
+# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
+_C.MODEL.RPN.IN_FEATURES = ["res4"]
+# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels
+# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
+_C.MODEL.RPN.BOUNDARY_THRESH = -1
+# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD]
+# Minimum overlap required between an anchor and ground-truth box for the
+# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
+# ==> positive RPN example: 1)
+# Maximum overlap allowed between an anchor and ground-truth box for the
+# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
+# ==> negative RPN example: 0)
+# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD)
+# are ignored (-1)
+_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7]
+_C.MODEL.RPN.IOU_LABELS = [0, -1, 1]
+# Number of regions per image used to train RPN
+_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
+# Target fraction of foreground (positive) examples per RPN minibatch
+_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
+# Options are: "smooth_l1", "giou"
+_C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1"
+_C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0
+# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
+_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
+# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
+_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0
+_C.MODEL.RPN.LOSS_WEIGHT = 1.0
+# Number of top scoring RPN proposals to keep before applying NMS
+# When FPN is used, this is *per FPN level* (not total)
+_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000
+_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000
+# Number of top scoring RPN proposals to keep after applying NMS
+# When FPN is used, this limit is applied per level and then again to the union
+# of proposals from all levels
+# NOTE: When FPN is used, the meaning of this config is different from Detectron1.
+# It means per-batch topk in Detectron1, but per-image topk here.
+# See the "find_top_rpn_proposals" function for details.
+_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
+_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
+# NMS threshold used on RPN proposals
+_C.MODEL.RPN.NMS_THRESH = 0.7
+# Set this to -1 to use the same number of output channels as input channels.
+_C.MODEL.RPN.CONV_DIMS = [-1]
+
+# ---------------------------------------------------------------------------- #
+# ROI HEADS options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_HEADS = CN()
+_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads"
+# Number of foreground classes
+_C.MODEL.ROI_HEADS.NUM_CLASSES = 80
+# Names of the input feature maps to be used by ROI heads
+# Currently all heads (box, mask, ...) use the same input feature map list
+# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
+_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"]
+# IOU overlap ratios [IOU_THRESHOLD]
+# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD)
+# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
+_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
+_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
+# RoI minibatch size *per image* (number of regions of interest [ROIs])
+# Total number of RoIs per training minibatch =
+#   ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
+# E.g., a common configuration is: 512 * 16 = 8192
+_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
+# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
+_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
+
+# Only used on test mode
+
+# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
+# balance obtaining high recall with not having too many low precision
+# detections that will slow down inference post processing steps (like NMS)
+# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
+# inference.
+_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
+# Overlap threshold used for non-maximum suppression (suppress boxes with
+# IoU >= this threshold)
+_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
+# If True, augment proposals with ground-truth boxes before sampling proposals to
+# train ROI heads.
+_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True
+
+# ---------------------------------------------------------------------------- #
+# Box Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_BOX_HEAD = CN()
+# C4 don't use head name option
+# Options for non-C4 models: FastRCNNConvFCHead,
+_C.MODEL.ROI_BOX_HEAD.NAME = ""
+# Options are: "smooth_l1", "giou"
+_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1"
+# The final scaling coefficient on the box regression loss, used to balance the magnitude of its
+# gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`.
+_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0
+# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
+# These are empirically chosen to approximately lead to unit variance targets
+_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0)
+# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
+_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0
+_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
+_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
+# Type of pooling operation applied to the incoming feature map for each RoI
+_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
+
+_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0
+# Hidden layer dimension for FC layers in the RoI box head
+_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024
+_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0
+# Channel dimension for Conv layers in the RoI box head
+_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256
+# Normalization method for the convolution layers.
+# Options: "" (no norm), "GN", "SyncBN".
+_C.MODEL.ROI_BOX_HEAD.NORM = ""
+# Whether to use class agnostic for bbox regression
+_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False
+# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes.
+_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False
+
+# ---------------------------------------------------------------------------- #
+# Cascaded Box Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_BOX_CASCADE_HEAD = CN()
+# The number of cascade stages is implicitly defined by the length of the following two configs.
+_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = (
+    (10.0, 10.0, 5.0, 5.0),
+    (20.0, 20.0, 10.0, 10.0),
+    (30.0, 30.0, 15.0, 15.0),
+)
+_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)
+
+
+# ---------------------------------------------------------------------------- #
+# Mask Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_MASK_HEAD = CN()
+_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
+_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
+_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
+_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0  # The number of convs in the mask head
+_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
+# Normalization method for the convolution layers.
+# Options: "" (no norm), "GN", "SyncBN".
+_C.MODEL.ROI_MASK_HEAD.NORM = ""
+# Whether to use class agnostic for mask prediction
+_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False
+# Type of pooling operation applied to the incoming feature map for each RoI
+_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2"
+
+
+# ---------------------------------------------------------------------------- #
+# Keypoint Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_KEYPOINT_HEAD = CN()
+_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
+_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
+_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
+_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8))
+_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17  # 17 is the number of keypoints in COCO.
+
+# Images with too few (or no) keypoints are excluded from training.
+_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1
+# Normalize by the total number of visible keypoints in the minibatch if True.
+# Otherwise, normalize by the total number of keypoints that could ever exist
+# in the minibatch.
+# The keypoint softmax loss is only calculated on visible keypoints.
+# Since the number of visible keypoints can vary significantly between
+# minibatches, this has the effect of up-weighting the importance of
+# minibatches with few visible keypoints. (Imagine the extreme case of
+# only one visible keypoint versus N: in the case of N, each one
+# contributes 1/N to the gradient compared to the single keypoint
+# determining the gradient direction). Instead, we can normalize the
+# loss by the total number of keypoints, if it were the case that all
+# keypoints were visible in a full minibatch. (Returning to the example,
+# this means that the one visible keypoint contributes as much as each
+# of the N keypoints.)
+_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True
+# Multi-task loss weight to use for keypoints
+# Recommended values:
+#   - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
+#   - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
+_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0
+# Type of pooling operation applied to the incoming feature map for each RoI
+_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2"
+
+# ---------------------------------------------------------------------------- #
+# Semantic Segmentation Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.SEM_SEG_HEAD = CN()
+_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead"
+_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"]
+# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
+# the correposnding pixel.
+_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255
+# Number of classes in the semantic segmentation head
+_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54
+# Number of channels in the 3x3 convs inside semantic-FPN heads.
+_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128
+# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
+_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
+# Normalization method for the convolution layers. Options: "" (no norm), "GN".
+_C.MODEL.SEM_SEG_HEAD.NORM = "GN"
+_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0
+
+_C.MODEL.PANOPTIC_FPN = CN()
+# Scaling of all losses from instance detection / segmentation head.
+_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0
+
+# options when combining instance & semantic segmentation outputs
+_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True})  # "COMBINE.ENABLED" is deprecated & not used
+_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5
+_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096
+_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5
+
+
+# ---------------------------------------------------------------------------- #
+# RetinaNet Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.RETINANET = CN()
+
+# This is the number of foreground classes.
+_C.MODEL.RETINANET.NUM_CLASSES = 80
+
+_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
+
+# Convolutions to use in the cls and bbox tower
+# NOTE: this doesn't include the last conv for logits
+_C.MODEL.RETINANET.NUM_CONVS = 4
+
+# IoU overlap ratio [bg, fg] for labeling anchors.
+# Anchors with < bg are labeled negative (0)
+# Anchors  with >= bg and < fg are ignored (-1)
+# Anchors with >= fg are labeled positive (1)
+_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
+_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
+
+# Prior prob for rare case (i.e. foreground) at the beginning of training.
+# This is used to set the bias for the logits layer of the classifier subnet.
+# This improves training stability in the case of heavy class imbalance.
+_C.MODEL.RETINANET.PRIOR_PROB = 0.01
+
+# Inference cls score threshold, only anchors with score > INFERENCE_TH are
+# considered for inference (to improve speed)
+_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
+# Select topk candidates before NMS
+_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
+_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
+
+# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
+_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
+
+# Loss parameters
+_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
+_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
+_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
+# Options are: "smooth_l1", "giou"
+_C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1"
+
+# One of BN, SyncBN, FrozenBN, GN
+# Only supports GN until unshared norm is implemented
+_C.MODEL.RETINANET.NORM = ""
+
+
+# ---------------------------------------------------------------------------- #
+# ResNe[X]t options (ResNets = {ResNet, ResNeXt}
+# Note that parts of a resnet may be used for both the backbone and the head
+# These options apply to both
+# ---------------------------------------------------------------------------- #
+_C.MODEL.RESNETS = CN()
+
+_C.MODEL.RESNETS.DEPTH = 50
+_C.MODEL.RESNETS.OUT_FEATURES = ["res4"]  # res4 for C4 backbone, res2..5 for FPN backbone
+
+# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
+_C.MODEL.RESNETS.NUM_GROUPS = 1
+
+# Options: FrozenBN, GN, "SyncBN", "BN"
+_C.MODEL.RESNETS.NORM = "FrozenBN"
+
+# Baseline width of each group.
+# Scaling this parameters will scale the width of all bottleneck layers.
+_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
+
+# Place the stride 2 conv on the 1x1 filter
+# Use True only for the original MSRA ResNet; use False for C2 and Torch models
+_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
+
+# Apply dilation in stage "res5"
+_C.MODEL.RESNETS.RES5_DILATION = 1
+
+# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet
+# For R18 and R34, this needs to be set to 64
+_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
+_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
+
+# Apply Deformable Convolution in stages
+# Specify if apply deform_conv on Res2, Res3, Res4, Res5
+_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
+# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168);
+# Use False for DeformableV1.
+_C.MODEL.RESNETS.DEFORM_MODULATED = False
+# Number of groups in deformable conv.
+_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1
+
+
+# ---------------------------------------------------------------------------- #
+# Solver
+# ---------------------------------------------------------------------------- #
+_C.SOLVER = CN()
+
+# See detectron2/solver/build.py for LR scheduler options
+_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
+
+_C.SOLVER.MAX_ITER = 40000
+
+_C.SOLVER.BASE_LR = 0.001
+
+_C.SOLVER.MOMENTUM = 0.9
+
+_C.SOLVER.NESTEROV = False
+
+_C.SOLVER.WEIGHT_DECAY = 0.0001
+# The weight decay that's applied to parameters of normalization layers
+# (typically the affine transformation)
+_C.SOLVER.WEIGHT_DECAY_NORM = 0.0
+
+_C.SOLVER.GAMMA = 0.1
+# The iteration number to decrease learning rate by GAMMA.
+_C.SOLVER.STEPS = (30000,)
+
+_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
+_C.SOLVER.WARMUP_ITERS = 1000
+_C.SOLVER.WARMUP_METHOD = "linear"
+
+# Save a checkpoint after every this number of iterations
+_C.SOLVER.CHECKPOINT_PERIOD = 5000
+
+# Number of images per batch across all machines. This is also the number
+# of training images per step (i.e. per iteration). If we use 16 GPUs
+# and IMS_PER_BATCH = 32, each GPU will see 2 images per batch.
+# May be adjusted automatically if REFERENCE_WORLD_SIZE is set.
+_C.SOLVER.IMS_PER_BATCH = 16
+
+# The reference number of workers (GPUs) this config is meant to train with.
+# It takes no effect when set to 0.
+# With a non-zero value, it will be used by DefaultTrainer to compute a desired
+# per-worker batch size, and then scale the other related configs (total batch size,
+# learning rate, etc) to match the per-worker batch size.
+# See documentation of `DefaultTrainer.auto_scale_workers` for details:
+_C.SOLVER.REFERENCE_WORLD_SIZE = 0
+
+# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
+# biases. This is not useful (at least for recent models). You should avoid
+# changing these and they exist only to reproduce Detectron v1 training if
+# desired.
+_C.SOLVER.BIAS_LR_FACTOR = 1.0
+_C.SOLVER.WEIGHT_DECAY_BIAS = _C.SOLVER.WEIGHT_DECAY
+
+# Gradient clipping
+_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False})
+# Type of gradient clipping, currently 2 values are supported:
+# - "value": the absolute values of elements of each gradients are clipped
+# - "norm": the norm of the gradient for each parameter is clipped thus
+#   affecting all elements in the parameter
+_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
+# Maximum absolute value used for clipping gradients
+_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
+# Floating point number p for L-p norm to be used with the "norm"
+# gradient clipping type; for L-inf, please specify .inf
+_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0
+
+# Enable automatic mixed precision for training
+# Note that this does not change model's inference behavior.
+# To use AMP in inference, run inference under autocast()
+_C.SOLVER.AMP = CN({"ENABLED": False})
+
+# ---------------------------------------------------------------------------- #
+# Specific test options
+# ---------------------------------------------------------------------------- #
+_C.TEST = CN()
+# For end-to-end tests to verify the expected accuracy.
+# Each item is [task, metric, value, tolerance]
+# e.g.: [['bbox', 'AP', 38.5, 0.2]]
+_C.TEST.EXPECTED_RESULTS = []
+# The period (in terms of steps) to evaluate the model during training.
+# Set to 0 to disable.
+_C.TEST.EVAL_PERIOD = 0
+# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval
+# When empty, it will use the defaults in COCO.
+# Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
+_C.TEST.KEYPOINT_OKS_SIGMAS = []
+# Maximum number of detections to return per image during inference (100 is
+# based on the limit established for the COCO dataset).
+_C.TEST.DETECTIONS_PER_IMAGE = 100
+
+_C.TEST.AUG = CN({"ENABLED": False})
+_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
+_C.TEST.AUG.MAX_SIZE = 4000
+_C.TEST.AUG.FLIP = True
+
+_C.TEST.PRECISE_BN = CN({"ENABLED": False})
+_C.TEST.PRECISE_BN.NUM_ITER = 200
+
+# ---------------------------------------------------------------------------- #
+# Misc options
+# ---------------------------------------------------------------------------- #
+# Directory where output files are written
+_C.OUTPUT_DIR = "./output"
+# Set seed to negative to fully randomize everything.
+# Set seed to positive to use a fixed seed. Note that a fixed seed increases
+# reproducibility but does not guarantee fully deterministic behavior.
+# Disabling all parallelism further increases reproducibility.
+_C.SEED = -1
+# Benchmark different cudnn algorithms.
+# If input images have very different sizes, this option will have large overhead
+# for about 10k iterations. It usually hurts total time, but can benefit for certain models.
+# If input images have the same or similar sizes, benchmark is often helpful.
+_C.CUDNN_BENCHMARK = False
+# The period (in terms of steps) for minibatch visualization at train time.
+# Set to 0 to disable.
+_C.VIS_PERIOD = 0
+
+# global config is for quick hack purposes.
+# You can set them in command line or config files,
+# and access it with:
+#
+# from detectron2.config import global_cfg
+# print(global_cfg.HACK)
+#
+# Do not commit any configs into it.
+_C.GLOBAL = CN()
+_C.GLOBAL.HACK = 1.0
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/instantiate.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/instantiate.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e02a2c526445ba2aa18396181cee966c548dc12
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/config/instantiate.py
@@ -0,0 +1,110 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import dataclasses
+import logging
+from collections import abc
+from typing import Any
+from omegaconf import DictConfig
+
+from detectron2.utils.registry import _convert_target_to_string, locate
+
+__all__ = ["dump_dataclass", "instantiate"]
+
+
+def dump_dataclass(obj: Any):
+    """
+    Dump a dataclass recursively into a dict that can be later instantiated.
+
+    Args:
+        obj: a dataclass object
+
+    Returns:
+        dict
+    """
+    assert dataclasses.is_dataclass(obj) and not isinstance(
+        obj, type
+    ), "dump_dataclass() requires an instance of a dataclass."
+    ret = {"_target_": _convert_target_to_string(type(obj))}
+    for f in dataclasses.fields(obj):
+        v = getattr(obj, f.name)
+        if dataclasses.is_dataclass(v):
+            v = dump_dataclass(v)
+        if isinstance(v, (list, tuple)):
+            v = [dump_dataclass(x) if dataclasses.is_dataclass(x) else x for x in v]
+        ret[f.name] = v
+    return ret
+
+
+def instantiate(cfg):
+    """
+    Recursively instantiate objects defined in dictionaries by
+    "_target_" and arguments.
+
+    Args:
+        cfg: a dict-like object with "_target_" that defines the caller, and
+            other keys that define the arguments
+
+    Returns:
+        object instantiated by cfg
+    """
+    from omegaconf import ListConfig
+
+    if isinstance(cfg, ListConfig):
+        lst = [instantiate(x) for x in cfg]
+        return ListConfig(lst, flags={"allow_objects": True})
+    if isinstance(cfg, list):
+        # Specialize for list, because many classes take
+        # list[objects] as arguments, such as ResNet, DatasetMapper
+        return [instantiate(x) for x in cfg]
+
+    if isinstance(cfg, abc.Mapping) and "_target_" in cfg:
+        # conceptually equivalent to hydra.utils.instantiate(cfg) with _convert_=all,
+        # but faster: https://github.com/facebookresearch/hydra/issues/1200
+        cfg = {k: instantiate(v) for k, v in cfg.items()}
+        cls = cfg.pop("_target_")
+        cls = instantiate(cls)
+
+        if isinstance(cls, str):
+            cls_name = cls
+            cls = locate(cls_name)
+            assert cls is not None, cls_name
+        else:
+            try:
+                cls_name = cls.__module__ + "." + cls.__qualname__
+            except AttributeError:
+                # target could be anything, so the above could fail
+                cls_name = str(cls)
+        assert callable(cls), f"_target_ {cls} does not define a callable object"
+        try:
+            return cls(**cfg)
+        except TypeError:
+            logger = logging.getLogger(__name__)
+            logger.error(f"Error when instantiating {cls_name}!")
+            raise
+    return cfg  # return as-is if don't know what to do
+
+
+class LazyCall:
+    """
+    Wrap a callable so that when it's called, the call will not be execued,
+    but returns a dict that describes the call.
+
+    LazyCall object has to be called with only keyword arguments. Positional
+    arguments are not yet supported.
+
+    Examples:
+    ::
+        layer_cfg = LazyCall(nn.Conv2d)(in_channels=32, out_channels=32)
+        layer_cfg.out_channels = 64
+        layer = instantiate(layer_cfg)
+    """
+
+    def __init__(self, target):
+        if not (callable(target) or isinstance(target, (str, abc.Mapping))):
+            raise TypeError(
+                "target of LazyCall must be a callable or defines a callable! Got {target}"
+            )
+        self._target = target
+
+    def __call__(self, **kwargs):
+        kwargs["_target_"] = self._target
+        return DictConfig(content=kwargs, flags={"allow_objects": True})
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..21c83f8cbd7a9388b452372f0444e78a54a33495
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from . import transforms  # isort:skip
+
+from .build import (
+    build_batch_data_loader,
+    build_detection_test_loader,
+    build_detection_train_loader,
+    get_detection_dataset_dicts,
+    load_proposals_into_dataset,
+    print_instances_class_histogram,
+)
+from .catalog import DatasetCatalog, MetadataCatalog, Metadata
+from .common import DatasetFromList, MapDataset
+from .dataset_mapper import DatasetMapper
+
+# ensure the builtin datasets are registered
+from . import datasets, samplers  # isort:skip
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/build.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..14dc1e769baf75352165fe00ff363023bdd10518
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/build.py
@@ -0,0 +1,472 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import logging
+import numpy as np
+import operator
+import pickle
+import torch.utils.data
+from tabulate import tabulate
+from termcolor import colored
+
+from detectron2.config import configurable
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import get_world_size
+from detectron2.utils.env import seed_all_rng
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import _log_api_usage, log_first_n
+
+from .catalog import DatasetCatalog, MetadataCatalog
+from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
+from .dataset_mapper import DatasetMapper
+from .detection_utils import check_metadata_consistency
+from .samplers import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
+
+"""
+This file contains the default logic to build a dataloader for training or testing.
+"""
+
+__all__ = [
+    "build_batch_data_loader",
+    "build_detection_train_loader",
+    "build_detection_test_loader",
+    "get_detection_dataset_dicts",
+    "load_proposals_into_dataset",
+    "print_instances_class_histogram",
+]
+
+
+def filter_images_with_only_crowd_annotations(dataset_dicts):
+    """
+    Filter out images with none annotations or only crowd annotations
+    (i.e., images without non-crowd annotations).
+    A common training-time preprocessing on COCO dataset.
+
+    Args:
+        dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
+
+    Returns:
+        list[dict]: the same format, but filtered.
+    """
+    num_before = len(dataset_dicts)
+
+    def valid(anns):
+        for ann in anns:
+            if ann.get("iscrowd", 0) == 0:
+                return True
+        return False
+
+    dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
+    num_after = len(dataset_dicts)
+    logger = logging.getLogger(__name__)
+    logger.info(
+        "Removed {} images with no usable annotations. {} images left.".format(
+            num_before - num_after, num_after
+        )
+    )
+    return dataset_dicts
+
+
+def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image):
+    """
+    Filter out images with too few number of keypoints.
+
+    Args:
+        dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
+
+    Returns:
+        list[dict]: the same format as dataset_dicts, but filtered.
+    """
+    num_before = len(dataset_dicts)
+
+    def visible_keypoints_in_image(dic):
+        # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility
+        annotations = dic["annotations"]
+        return sum(
+            (np.array(ann["keypoints"][2::3]) > 0).sum()
+            for ann in annotations
+            if "keypoints" in ann
+        )
+
+    dataset_dicts = [
+        x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image
+    ]
+    num_after = len(dataset_dicts)
+    logger = logging.getLogger(__name__)
+    logger.info(
+        "Removed {} images with fewer than {} keypoints.".format(
+            num_before - num_after, min_keypoints_per_image
+        )
+    )
+    return dataset_dicts
+
+
+def load_proposals_into_dataset(dataset_dicts, proposal_file):
+    """
+    Load precomputed object proposals into the dataset.
+
+    The proposal file should be a pickled dict with the following keys:
+
+    - "ids": list[int] or list[str], the image ids
+    - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id
+    - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores
+      corresponding to the boxes.
+    - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``.
+
+    Args:
+        dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
+        proposal_file (str): file path of pre-computed proposals, in pkl format.
+
+    Returns:
+        list[dict]: the same format as dataset_dicts, but added proposal field.
+    """
+    logger = logging.getLogger(__name__)
+    logger.info("Loading proposals from: {}".format(proposal_file))
+
+    with PathManager.open(proposal_file, "rb") as f:
+        proposals = pickle.load(f, encoding="latin1")
+
+    # Rename the key names in D1 proposal files
+    rename_keys = {"indexes": "ids", "scores": "objectness_logits"}
+    for key in rename_keys:
+        if key in proposals:
+            proposals[rename_keys[key]] = proposals.pop(key)
+
+    # Fetch the indexes of all proposals that are in the dataset
+    # Convert image_id to str since they could be int.
+    img_ids = set({str(record["image_id"]) for record in dataset_dicts})
+    id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids}
+
+    # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS'
+    bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS
+
+    for record in dataset_dicts:
+        # Get the index of the proposal
+        i = id_to_index[str(record["image_id"])]
+
+        boxes = proposals["boxes"][i]
+        objectness_logits = proposals["objectness_logits"][i]
+        # Sort the proposals in descending order of the scores
+        inds = objectness_logits.argsort()[::-1]
+        record["proposal_boxes"] = boxes[inds]
+        record["proposal_objectness_logits"] = objectness_logits[inds]
+        record["proposal_bbox_mode"] = bbox_mode
+
+    return dataset_dicts
+
+
+def print_instances_class_histogram(dataset_dicts, class_names):
+    """
+    Args:
+        dataset_dicts (list[dict]): list of dataset dicts.
+        class_names (list[str]): list of class names (zero-indexed).
+    """
+    num_classes = len(class_names)
+    hist_bins = np.arange(num_classes + 1)
+    histogram = np.zeros((num_classes,), dtype=np.int)
+    for entry in dataset_dicts:
+        annos = entry["annotations"]
+        classes = np.asarray(
+            [x["category_id"] for x in annos if not x.get("iscrowd", 0)], dtype=np.int
+        )
+        if len(classes):
+            assert classes.min() >= 0, f"Got an invalid category_id={classes.min()}"
+            assert (
+                classes.max() < num_classes
+            ), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes"
+        histogram += np.histogram(classes, bins=hist_bins)[0]
+
+    N_COLS = min(6, len(class_names) * 2)
+
+    def short_name(x):
+        # make long class names shorter. useful for lvis
+        if len(x) > 13:
+            return x[:11] + ".."
+        return x
+
+    data = list(
+        itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
+    )
+    total_num_instances = sum(data[1::2])
+    data.extend([None] * (N_COLS - (len(data) % N_COLS)))
+    if num_classes > 1:
+        data.extend(["total", total_num_instances])
+    data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
+    table = tabulate(
+        data,
+        headers=["category", "#instances"] * (N_COLS // 2),
+        tablefmt="pipe",
+        numalign="left",
+        stralign="center",
+    )
+    log_first_n(
+        logging.INFO,
+        "Distribution of instances among all {} categories:\n".format(num_classes)
+        + colored(table, "cyan"),
+        key="message",
+    )
+
+
+def get_detection_dataset_dicts(names, filter_empty=True, min_keypoints=0, proposal_files=None):
+    """
+    Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
+
+    Args:
+        names (str or list[str]): a dataset name or a list of dataset names
+        filter_empty (bool): whether to filter out images without instance annotations
+        min_keypoints (int): filter out images with fewer keypoints than
+            `min_keypoints`. Set to 0 to do nothing.
+        proposal_files (list[str]): if given, a list of object proposal files
+            that match each dataset in `names`.
+
+    Returns:
+        list[dict]: a list of dicts following the standard dataset dict format.
+    """
+    if isinstance(names, str):
+        names = [names]
+    assert len(names), names
+    dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in names]
+    for dataset_name, dicts in zip(names, dataset_dicts):
+        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
+
+    if proposal_files is not None:
+        assert len(names) == len(proposal_files)
+        # load precomputed proposals from proposal files
+        dataset_dicts = [
+            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
+            for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
+        ]
+
+    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
+
+    has_instances = "annotations" in dataset_dicts[0]
+    if filter_empty and has_instances:
+        dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
+    if min_keypoints > 0 and has_instances:
+        dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
+
+    if has_instances:
+        try:
+            class_names = MetadataCatalog.get(names[0]).thing_classes
+            check_metadata_consistency("thing_classes", names)
+            print_instances_class_histogram(dataset_dicts, class_names)
+        except AttributeError:  # class names are not available for this dataset
+            pass
+
+    assert len(dataset_dicts), "No valid data found in {}.".format(",".join(names))
+    return dataset_dicts
+
+
+def build_batch_data_loader(
+    dataset, sampler, total_batch_size, *, aspect_ratio_grouping=False, num_workers=0
+):
+    """
+    Build a batched dataloader for training.
+
+    Args:
+        dataset (torch.utils.data.Dataset): map-style PyTorch dataset. Can be indexed.
+        sampler (torch.utils.data.sampler.Sampler): a sampler that produces indices
+        total_batch_size, aspect_ratio_grouping, num_workers): see
+            :func:`build_detection_train_loader`.
+
+    Returns:
+        iterable[list]. Length of each list is the batch size of the current
+            GPU. Each element in the list comes from the dataset.
+    """
+    world_size = get_world_size()
+    assert (
+        total_batch_size > 0 and total_batch_size % world_size == 0
+    ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
+        total_batch_size, world_size
+    )
+
+    batch_size = total_batch_size // world_size
+    if aspect_ratio_grouping:
+        data_loader = torch.utils.data.DataLoader(
+            dataset,
+            sampler=sampler,
+            num_workers=num_workers,
+            batch_sampler=None,
+            collate_fn=operator.itemgetter(0),  # don't batch, but yield individual elements
+            worker_init_fn=worker_init_reset_seed,
+        )  # yield individual mapped dict
+        return AspectRatioGroupedDataset(data_loader, batch_size)
+    else:
+        batch_sampler = torch.utils.data.sampler.BatchSampler(
+            sampler, batch_size, drop_last=True
+        )  # drop_last so the batch always have the same size
+        return torch.utils.data.DataLoader(
+            dataset,
+            num_workers=num_workers,
+            batch_sampler=batch_sampler,
+            collate_fn=trivial_batch_collator,
+            worker_init_fn=worker_init_reset_seed,
+        )
+
+
+def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None):
+    if dataset is None:
+        dataset = get_detection_dataset_dicts(
+            cfg.DATASETS.TRAIN,
+            filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
+            min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+            if cfg.MODEL.KEYPOINT_ON
+            else 0,
+            proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+        )
+        _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0])
+
+    if mapper is None:
+        mapper = DatasetMapper(cfg, True)
+
+    if sampler is None:
+        sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
+        logger = logging.getLogger(__name__)
+        logger.info("Using training sampler {}".format(sampler_name))
+        if sampler_name == "TrainingSampler":
+            sampler = TrainingSampler(len(dataset))
+        elif sampler_name == "RepeatFactorTrainingSampler":
+            repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
+                dataset, cfg.DATALOADER.REPEAT_THRESHOLD
+            )
+            sampler = RepeatFactorTrainingSampler(repeat_factors)
+        else:
+            raise ValueError("Unknown training sampler: {}".format(sampler_name))
+
+    return {
+        "dataset": dataset,
+        "sampler": sampler,
+        "mapper": mapper,
+        "total_batch_size": cfg.SOLVER.IMS_PER_BATCH,
+        "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING,
+        "num_workers": cfg.DATALOADER.NUM_WORKERS,
+    }
+
+
+# TODO can allow dataset as an iterable or IterableDataset to make this function more general
+@configurable(from_config=_train_loader_from_config)
+def build_detection_train_loader(
+    dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0
+):
+    """
+    Build a dataloader for object detection with some default features.
+    This interface is experimental.
+
+    Args:
+        dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
+            or a map-style pytorch dataset. They can be obtained by using
+            :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
+        mapper (callable): a callable which takes a sample (dict) from dataset and
+            returns the format to be consumed by the model.
+            When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
+        sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
+            indices to be applied on ``dataset``. Default to :class:`TrainingSampler`,
+            which coordinates an infinite random shuffle sequence across all workers.
+        total_batch_size (int): total batch size across all workers. Batching
+            simply puts data into a list.
+        aspect_ratio_grouping (bool): whether to group images with similar
+            aspect ratio for efficiency. When enabled, it requires each
+            element in dataset be a dict with keys "width" and "height".
+        num_workers (int): number of parallel data loading workers
+
+    Returns:
+        torch.utils.data.DataLoader:
+            a dataloader. Each output from it is a ``list[mapped_element]`` of length
+            ``total_batch_size / num_workers``, where ``mapped_element`` is produced
+            by the ``mapper``.
+    """
+    if isinstance(dataset, list):
+        dataset = DatasetFromList(dataset, copy=False)
+    if mapper is not None:
+        dataset = MapDataset(dataset, mapper)
+    if sampler is None:
+        sampler = TrainingSampler(len(dataset))
+    assert isinstance(sampler, torch.utils.data.sampler.Sampler)
+    return build_batch_data_loader(
+        dataset,
+        sampler,
+        total_batch_size,
+        aspect_ratio_grouping=aspect_ratio_grouping,
+        num_workers=num_workers,
+    )
+
+
+def _test_loader_from_config(cfg, dataset_name, mapper=None):
+    """
+    Uses the given `dataset_name` argument (instead of the names in cfg), because the
+    standard practice is to evaluate each test set individually (not combining them).
+    """
+    dataset = get_detection_dataset_dicts(
+        [dataset_name],
+        filter_empty=False,
+        proposal_files=[
+            cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
+        ]
+        if cfg.MODEL.LOAD_PROPOSALS
+        else None,
+    )
+    if mapper is None:
+        mapper = DatasetMapper(cfg, False)
+    return {"dataset": dataset, "mapper": mapper, "num_workers": cfg.DATALOADER.NUM_WORKERS}
+
+
+@configurable(from_config=_test_loader_from_config)
+def build_detection_test_loader(dataset, *, mapper, sampler=None, num_workers=0):
+    """
+    Similar to `build_detection_train_loader`, but uses a batch size of 1,
+    and :class:`InferenceSampler`. This sampler coordinates all workers to
+    produce the exact set of all samples.
+    This interface is experimental.
+
+    Args:
+        dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
+            or a map-style pytorch dataset. They can be obtained by using
+            :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
+        mapper (callable): a callable which takes a sample (dict) from dataset
+           and returns the format to be consumed by the model.
+           When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
+        sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
+            indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
+            which splits the dataset across all workers.
+        num_workers (int): number of parallel data loading workers
+
+    Returns:
+        DataLoader: a torch DataLoader, that loads the given detection
+        dataset, with test-time transformation and batching.
+
+    Examples:
+    ::
+        data_loader = build_detection_test_loader(
+            DatasetRegistry.get("my_test"),
+            mapper=DatasetMapper(...))
+
+        # or, instantiate with a CfgNode:
+        data_loader = build_detection_test_loader(cfg, "my_test")
+    """
+    if isinstance(dataset, list):
+        dataset = DatasetFromList(dataset, copy=False)
+    if mapper is not None:
+        dataset = MapDataset(dataset, mapper)
+    if sampler is None:
+        sampler = InferenceSampler(len(dataset))
+    # Always use 1 image per worker during inference since this is the
+    # standard when reporting inference time in papers.
+    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
+    data_loader = torch.utils.data.DataLoader(
+        dataset,
+        num_workers=num_workers,
+        batch_sampler=batch_sampler,
+        collate_fn=trivial_batch_collator,
+    )
+    return data_loader
+
+
+def trivial_batch_collator(batch):
+    """
+    A batch collator that does nothing.
+    """
+    return batch
+
+
+def worker_init_reset_seed(worker_id):
+    initial_seed = torch.initial_seed() % 2 ** 31
+    seed_all_rng(initial_seed + worker_id)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/catalog.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/catalog.py
new file mode 100644
index 0000000000000000000000000000000000000000..45c110c19508f23921b9033cdaf0aa8056f0c125
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/catalog.py
@@ -0,0 +1,236 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import types
+from collections import UserDict
+from typing import List
+
+from detectron2.utils.logger import log_first_n
+
+__all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"]
+
+
+class _DatasetCatalog(UserDict):
+    """
+    A global dictionary that stores information about the datasets and how to obtain them.
+
+    It contains a mapping from strings
+    (which are names that identify a dataset, e.g. "coco_2014_train")
+    to a function which parses the dataset and returns the samples in the
+    format of `list[dict]`.
+
+    The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details)
+    if used with the data loader functionalities in `data/build.py,data/detection_transform.py`.
+
+    The purpose of having this catalog is to make it easy to choose
+    different datasets, by just using the strings in the config.
+    """
+
+    def register(self, name, func):
+        """
+        Args:
+            name (str): the name that identifies a dataset, e.g. "coco_2014_train".
+            func (callable): a callable which takes no arguments and returns a list of dicts.
+                It must return the same results if called multiple times.
+        """
+        assert callable(func), "You must register a function with `DatasetCatalog.register`!"
+        assert name not in self, "Dataset '{}' is already registered!".format(name)
+        self[name] = func
+
+    def get(self, name):
+        """
+        Call the registered function and return its results.
+
+        Args:
+            name (str): the name that identifies a dataset, e.g. "coco_2014_train".
+
+        Returns:
+            list[dict]: dataset annotations.
+        """
+        try:
+            f = self[name]
+        except KeyError as e:
+            raise KeyError(
+                "Dataset '{}' is not registered! Available datasets are: {}".format(
+                    name, ", ".join(list(self.keys()))
+                )
+            ) from e
+        return f()
+
+    def list(self) -> List[str]:
+        """
+        List all registered datasets.
+
+        Returns:
+            list[str]
+        """
+        return list(self.keys())
+
+    def remove(self, name):
+        """
+        Alias of ``pop``.
+        """
+        self.pop(name)
+
+    def __str__(self):
+        return "DatasetCatalog(registered datasets: {})".format(", ".join(self.keys()))
+
+    __repr__ = __str__
+
+
+DatasetCatalog = _DatasetCatalog()
+DatasetCatalog.__doc__ = (
+    _DatasetCatalog.__doc__
+    + """
+    .. automethod:: detectron2.data.catalog.DatasetCatalog.register
+    .. automethod:: detectron2.data.catalog.DatasetCatalog.get
+"""
+)
+
+
+class Metadata(types.SimpleNamespace):
+    """
+    A class that supports simple attribute setter/getter.
+    It is intended for storing metadata of a dataset and make it accessible globally.
+
+    Examples:
+    ::
+        # somewhere when you load the data:
+        MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"]
+
+        # somewhere when you print statistics or visualize:
+        classes = MetadataCatalog.get("mydataset").thing_classes
+    """
+
+    # the name of the dataset
+    # set default to N/A so that `self.name` in the errors will not trigger getattr again
+    name: str = "N/A"
+
+    _RENAMED = {
+        "class_names": "thing_classes",
+        "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id",
+        "stuff_class_names": "stuff_classes",
+    }
+
+    def __getattr__(self, key):
+        if key in self._RENAMED:
+            log_first_n(
+                logging.WARNING,
+                "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
+                n=10,
+            )
+            return getattr(self, self._RENAMED[key])
+
+        # "name" exists in every metadata
+        if len(self.__dict__) > 1:
+            raise AttributeError(
+                "Attribute '{}' does not exist in the metadata of dataset '{}'. Available "
+                "keys are {}.".format(key, self.name, str(self.__dict__.keys()))
+            )
+        else:
+            raise AttributeError(
+                f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': "
+                "metadata is empty."
+            )
+
+    def __setattr__(self, key, val):
+        if key in self._RENAMED:
+            log_first_n(
+                logging.WARNING,
+                "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
+                n=10,
+            )
+            setattr(self, self._RENAMED[key], val)
+
+        # Ensure that metadata of the same name stays consistent
+        try:
+            oldval = getattr(self, key)
+            assert oldval == val, (
+                "Attribute '{}' in the metadata of '{}' cannot be set "
+                "to a different value!\n{} != {}".format(key, self.name, oldval, val)
+            )
+        except AttributeError:
+            super().__setattr__(key, val)
+
+    def as_dict(self):
+        """
+        Returns all the metadata as a dict.
+        Note that modifications to the returned dict will not reflect on the Metadata object.
+        """
+        return copy.copy(self.__dict__)
+
+    def set(self, **kwargs):
+        """
+        Set multiple metadata with kwargs.
+        """
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+        return self
+
+    def get(self, key, default=None):
+        """
+        Access an attribute and return its value if exists.
+        Otherwise return default.
+        """
+        try:
+            return getattr(self, key)
+        except AttributeError:
+            return default
+
+
+class _MetadataCatalog(UserDict):
+    """
+    MetadataCatalog is a global dictionary that provides access to
+    :class:`Metadata` of a given dataset.
+
+    The metadata associated with a certain name is a singleton: once created, the
+    metadata will stay alive and will be returned by future calls to ``get(name)``.
+
+    It's like global variables, so don't abuse it.
+    It's meant for storing knowledge that's constant and shared across the execution
+    of the program, e.g.: the class names in COCO.
+    """
+
+    def get(self, name):
+        """
+        Args:
+            name (str): name of a dataset (e.g. coco_2014_train).
+
+        Returns:
+            Metadata: The :class:`Metadata` instance associated with this name,
+            or create an empty one if none is available.
+        """
+        assert len(name)
+        r = super().get(name, None)
+        if r is None:
+            r = self[name] = Metadata(name=name)
+        return r
+
+    def list(self):
+        """
+        List all registered metadata.
+
+        Returns:
+            list[str]: keys (names of datasets) of all registered metadata
+        """
+        return list(self.keys())
+
+    def remove(self, name):
+        """
+        Alias of ``pop``.
+        """
+        self.pop(name)
+
+    def __str__(self):
+        return "MetadataCatalog(registered metadata: {})".format(", ".join(self.keys()))
+
+    __repr__ = __str__
+
+
+MetadataCatalog = _MetadataCatalog()
+MetadataCatalog.__doc__ = (
+    _MetadataCatalog.__doc__
+    + """
+    .. automethod:: detectron2.data.catalog.MetadataCatalog.get
+"""
+)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/common.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef7d97c2860b63d7ff4686f2e86f00fe6e181a35
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/common.py
@@ -0,0 +1,186 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import logging
+import numpy as np
+import pickle
+import random
+import torch.utils.data as data
+from torch.utils.data.sampler import Sampler
+
+from detectron2.utils.serialize import PicklableWrapper
+
+__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset", "ToIterableDataset"]
+
+
+class MapDataset(data.Dataset):
+    """
+    Map a function over the elements in a dataset.
+
+    Args:
+        dataset: a dataset where map function is applied.
+        map_func: a callable which maps the element in dataset. map_func is
+            responsible for error handling, when error happens, it needs to
+            return None so the MapDataset will randomly use other
+            elements from the dataset.
+    """
+
+    def __init__(self, dataset, map_func):
+        self._dataset = dataset
+        self._map_func = PicklableWrapper(map_func)  # wrap so that a lambda will work
+
+        self._rng = random.Random(42)
+        self._fallback_candidates = set(range(len(dataset)))
+
+    def __len__(self):
+        return len(self._dataset)
+
+    def __getitem__(self, idx):
+        retry_count = 0
+        cur_idx = int(idx)
+
+        while True:
+            data = self._map_func(self._dataset[cur_idx])
+            if data is not None:
+                self._fallback_candidates.add(cur_idx)
+                return data
+
+            # _map_func fails for this idx, use a random new index from the pool
+            retry_count += 1
+            self._fallback_candidates.discard(cur_idx)
+            cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
+
+            if retry_count >= 3:
+                logger = logging.getLogger(__name__)
+                logger.warning(
+                    "Failed to apply `_map_func` for idx: {}, retry count: {}".format(
+                        idx, retry_count
+                    )
+                )
+
+
+class DatasetFromList(data.Dataset):
+    """
+    Wrap a list to a torch Dataset. It produces elements of the list as data.
+    """
+
+    def __init__(self, lst: list, copy: bool = True, serialize: bool = True):
+        """
+        Args:
+            lst (list): a list which contains elements to produce.
+            copy (bool): whether to deepcopy the element when producing it,
+                so that the result can be modified in place without affecting the
+                source in the list.
+            serialize (bool): whether to hold memory using serialized objects, when
+                enabled, data loader workers can use shared RAM from master
+                process instead of making a copy.
+        """
+        self._lst = lst
+        self._copy = copy
+        self._serialize = serialize
+
+        def _serialize(data):
+            buffer = pickle.dumps(data, protocol=-1)
+            return np.frombuffer(buffer, dtype=np.uint8)
+
+        if self._serialize:
+            logger = logging.getLogger(__name__)
+            logger.info(
+                "Serializing {} elements to byte tensors and concatenating them all ...".format(
+                    len(self._lst)
+                )
+            )
+            self._lst = [_serialize(x) for x in self._lst]
+            self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
+            self._addr = np.cumsum(self._addr)
+            self._lst = np.concatenate(self._lst)
+            logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024 ** 2))
+
+    def __len__(self):
+        if self._serialize:
+            return len(self._addr)
+        else:
+            return len(self._lst)
+
+    def __getitem__(self, idx):
+        if self._serialize:
+            start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
+            end_addr = self._addr[idx].item()
+            bytes = memoryview(self._lst[start_addr:end_addr])
+            return pickle.loads(bytes)
+        elif self._copy:
+            return copy.deepcopy(self._lst[idx])
+        else:
+            return self._lst[idx]
+
+
+class ToIterableDataset(data.IterableDataset):
+    """
+    Convert an old indices-based (also called map-style) dataset
+    to an iterable-style dataset.
+    """
+
+    def __init__(self, dataset, sampler):
+        """
+        Args:
+            dataset (torch.utils.data.Dataset): an old-style dataset with ``__getitem__``
+            sampler (torch.utils.data.sampler.Sampler): a cheap iterable that produces indices
+                to be applied on ``dataset``.
+        """
+        assert not isinstance(dataset, data.IterableDataset), dataset
+        assert isinstance(sampler, Sampler), sampler
+        self.dataset = dataset
+        self.sampler = sampler
+
+    def __iter__(self):
+        worker_info = data.get_worker_info()
+        if worker_info is None or worker_info.num_workers == 1:
+            for idx in self.sampler:
+                yield self.dataset[idx]
+        else:
+            # With map-style dataset, `DataLoader(dataset, sampler)` runs the
+            # sampler in main process only. But `DataLoader(ToIterableDataset(dataset, sampler))`
+            # will run sampler in every of the N worker and only keep 1/N of the ids on each
+            # worker. The assumption is that sampler is cheap to iterate and it's fine to discard
+            # ids in workers.
+            for idx in itertools.islice(
+                self.sampler, worker_info.id, None, worker_info.num_workers
+            ):
+                yield self.dataset[idx]
+
+
+class AspectRatioGroupedDataset(data.IterableDataset):
+    """
+    Batch data that have similar aspect ratio together.
+    In this implementation, images whose aspect ratio < (or >) 1 will
+    be batched together.
+    This improves training speed because the images then need less padding
+    to form a batch.
+
+    It assumes the underlying dataset produces dicts with "width" and "height" keys.
+    It will then produce a list of original dicts with length = batch_size,
+    all with similar aspect ratios.
+    """
+
+    def __init__(self, dataset, batch_size):
+        """
+        Args:
+            dataset: an iterable. Each element must be a dict with keys
+                "width" and "height", which will be used to batch data.
+            batch_size (int):
+        """
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self._buckets = [[] for _ in range(2)]
+        # Hard-coded two aspect ratio groups: w > h and w < h.
+        # Can add support for more aspect ratio groups, but doesn't seem useful
+
+    def __iter__(self):
+        for d in self.dataset:
+            w, h = d["width"], d["height"]
+            bucket_id = 0 if w > h else 1
+            bucket = self._buckets[bucket_id]
+            bucket.append(d)
+            if len(bucket) == self.batch_size:
+                yield bucket[:]
+                del bucket[:]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/dataset_mapper.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..55631fc6ab027a0a6d5d6b3c0b902a09a4a85efc
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/dataset_mapper.py
@@ -0,0 +1,186 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import numpy as np
+from typing import List, Optional, Union
+import torch
+
+from detectron2.config import configurable
+
+from . import detection_utils as utils
+from . import transforms as T
+
+"""
+This file contains the default mapping that's applied to "dataset dicts".
+"""
+
+__all__ = ["DatasetMapper"]
+
+
+class DatasetMapper:
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by the model.
+
+    This is the default callable to be used to map your dataset dict into training data.
+    You may need to follow it to implement your own one for customized logic,
+    such as a different way to read or transform images.
+    See :doc:`/tutorials/data_loading` for details.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies cropping/geometric transforms to the image and annotations
+    3. Prepare data and annotations to Tensor and :class:`Instances`
+    """
+
+    @configurable
+    def __init__(
+        self,
+        is_train: bool,
+        *,
+        augmentations: List[Union[T.Augmentation, T.Transform]],
+        image_format: str,
+        use_instance_mask: bool = False,
+        use_keypoint: bool = False,
+        instance_mask_format: str = "polygon",
+        keypoint_hflip_indices: Optional[np.ndarray] = None,
+        precomputed_proposal_topk: Optional[int] = None,
+        recompute_boxes: bool = False,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            is_train: whether it's used in training or inference
+            augmentations: a list of augmentations or deterministic transforms to apply
+            image_format: an image format supported by :func:`detection_utils.read_image`.
+            use_instance_mask: whether to process instance segmentation annotations, if available
+            use_keypoint: whether to process keypoint annotations if available
+            instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation
+                masks into this format.
+            keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices`
+            precomputed_proposal_topk: if given, will load pre-computed
+                proposals from dataset_dict and keep the top k proposals for each image.
+            recompute_boxes: whether to overwrite bounding box annotations
+                by computing tight bounding boxes from instance mask annotations.
+        """
+        if recompute_boxes:
+            assert use_instance_mask, "recompute_boxes requires instance masks"
+        # fmt: off
+        self.is_train               = is_train
+        self.augmentations          = T.AugmentationList(augmentations)
+        self.image_format           = image_format
+        self.use_instance_mask      = use_instance_mask
+        self.instance_mask_format   = instance_mask_format
+        self.use_keypoint           = use_keypoint
+        self.keypoint_hflip_indices = keypoint_hflip_indices
+        self.proposal_topk          = precomputed_proposal_topk
+        self.recompute_boxes        = recompute_boxes
+        # fmt: on
+        logger = logging.getLogger(__name__)
+        mode = "training" if is_train else "inference"
+        logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
+
+    @classmethod
+    def from_config(cls, cfg, is_train: bool = True):
+        augs = utils.build_augmentation(cfg, is_train)
+        if cfg.INPUT.CROP.ENABLED and is_train:
+            augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
+            recompute_boxes = cfg.MODEL.MASK_ON
+        else:
+            recompute_boxes = False
+
+        ret = {
+            "is_train": is_train,
+            "augmentations": augs,
+            "image_format": cfg.INPUT.FORMAT,
+            "use_instance_mask": cfg.MODEL.MASK_ON,
+            "instance_mask_format": cfg.INPUT.MASK_FORMAT,
+            "use_keypoint": cfg.MODEL.KEYPOINT_ON,
+            "recompute_boxes": recompute_boxes,
+        }
+
+        if cfg.MODEL.KEYPOINT_ON:
+            ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
+
+        if cfg.MODEL.LOAD_PROPOSALS:
+            ret["precomputed_proposal_topk"] = (
+                cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
+                if is_train
+                else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
+            )
+        return ret
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        # USER: Write your own image loading if it's not from a file
+        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
+        utils.check_image_size(dataset_dict, image)
+
+        # USER: Remove if you don't do semantic/panoptic segmentation.
+        if "sem_seg_file_name" in dataset_dict:
+            sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
+        else:
+            sem_seg_gt = None
+
+        aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
+        transforms = self.augmentations(aug_input)
+        image, sem_seg_gt = aug_input.image, aug_input.sem_seg
+
+        image_shape = image.shape[:2]  # h, w
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+        if sem_seg_gt is not None:
+            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))
+
+        # USER: Remove if you don't use pre-computed proposals.
+        # Most users would not need this feature.
+        if self.proposal_topk is not None:
+            utils.transform_proposals(
+                dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
+            )
+
+        if not self.is_train:
+            # USER: Modify this if you want to keep them for some reason.
+            dataset_dict.pop("annotations", None)
+            dataset_dict.pop("sem_seg_file_name", None)
+            return dataset_dict
+        if "annotations" in dataset_dict:
+            # USER: Modify this if you want to keep them for some reason.
+            for anno in dataset_dict["annotations"]:
+                if not self.use_instance_mask:
+                    anno.pop("segmentation", None)
+                if not self.use_keypoint:
+                    anno.pop("keypoints", None)
+
+            # USER: Implement additional transformations if you have other types of data
+            annos = [
+                utils.transform_instance_annotations(
+                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
+                )
+                for obj in dataset_dict.pop("annotations")
+                if obj.get("iscrowd", 0) == 0
+            ]
+            instances = utils.annotations_to_instances(
+                annos, image_shape, mask_format=self.instance_mask_format
+            )
+
+            # After transforms such as cropping are applied, the bounding box may no longer
+            # tightly bound the object. As an example, imagine a triangle object
+            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
+            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
+            # the intersection of original bounding box and the cropping box.
+            if self.recompute_boxes:
+                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
+            dataset_dict["instances"] = utils.filter_empty_instances(instances)
+        return dataset_dict
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbd92e8e2e1295d73e28f1eb2ed2368f368849a3
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .coco import load_coco_json, load_sem_seg, register_coco_instances
+from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
+from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
+from .pascal_voc import load_voc_instances, register_pascal_voc
+from . import builtin as _builtin  # ensure the builtin datasets are registered
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/builtin.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/builtin.py
new file mode 100644
index 0000000000000000000000000000000000000000..eaacb44cfe65579cc3c466c5c94fb186c100c9a5
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/builtin.py
@@ -0,0 +1,280 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+
+"""
+This file registers pre-defined datasets at hard-coded paths, and their metadata.
+
+We hard-code metadata for common datasets. This will enable:
+1. Consistency check when loading the datasets
+2. Use models on these standard datasets directly and run demos,
+   without having to download the dataset annotations
+
+We hard-code some paths to the dataset that's assumed to
+exist in "./datasets/".
+
+Users SHOULD NOT use this file to create new dataset / metadata for new dataset.
+To add new dataset, refer to the tutorial "docs/DATASETS.md".
+"""
+
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+
+from .builtin_meta import ADE20K_SEM_SEG_CATEGORIES, _get_builtin_metadata
+from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic
+from .cityscapes_panoptic import register_all_cityscapes_panoptic
+from .coco import load_sem_seg, register_coco_instances
+from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
+from .lvis import get_lvis_instances_meta, register_lvis_instances
+from .pascal_voc import register_pascal_voc
+
+# ==== Predefined datasets and splits for COCO ==========
+
+_PREDEFINED_SPLITS_COCO = {}
+_PREDEFINED_SPLITS_COCO["coco"] = {
+    "coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"),
+    "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"),
+    "coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"),
+    "coco_2014_minival_100": ("coco/val2014", "coco/annotations/instances_minival2014_100.json"),
+    "coco_2014_valminusminival": (
+        "coco/val2014",
+        "coco/annotations/instances_valminusminival2014.json",
+    ),
+    "coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"),
+    "coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"),
+    "coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"),
+    "coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"),
+    "coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"),
+    "icdar_2015_train": ("coco/ic15_images", "annotations/icdar_2015.json"),
+    "icdar_2013_train": ("coco/ic13_images", "annotations/icdar_2013.json"),
+    "icdar_2017_mlt": ("icdar_2017_mlt", "annotations/icdar_2017_mlt.json"),
+    "icdar_2017_validation_mlt": ("icdar_2017_validation_mlt", "annotations/icdar_2017_validation_mlt.json"),
+    "icdar_curvesynthtext_train1": ("curve_text/emcs_imgs", "annotations/ecms_v1_maxlen25.json"),
+    "icdar_curvesynthtext_train2": ("curve_text/syntext_word_eng", "annotations/syntext_word_eng.json"),
+    "art": ("icdar2019_art_images", "annotations/icdar_2019_art_swints.json"),
+    "rects": ("icdar2019_rects_images", "annotations/icdar_2019_rects_swints.json"),
+    "lsvt": ("icdar2019_lsvt_images", "annotations/icdar_2019_lsvt_swints.json"),
+    "chn_syn": ("chn_syn_images", "annotations/chn_syn.json"),
+    "totaltext_train": ("totaltext/totaltext_train_images", "totaltext/totaltext_train.json"),
+    "totaltext_test": ("totaltext/totaltext_test_images", "totaltext/totaltext_test.json"),
+    "vintext_train": ("fimotext/train_images", "fimotext/train.json"),
+    "vintext_test": ("fimotext/val_images", "fimotext/valid.json"),
+    "ctw1500_train": ("train2017", "annotations/instances_train2017.json"),
+    "ctw1500_test": ("ctwtest_text_image", "annotations/test_ctw1500_maxlen100.json"),
+}  
+
+_PREDEFINED_SPLITS_COCO["coco_person"] = {
+    "keypoints_coco_2014_train": (
+        "coco/train2014",
+        "coco/annotations/person_keypoints_train2014.json",
+    ),
+    "keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"),
+    "keypoints_coco_2014_minival": (
+        "coco/val2014",
+        "coco/annotations/person_keypoints_minival2014.json",
+    ),
+    "keypoints_coco_2014_valminusminival": (
+        "coco/val2014",
+        "coco/annotations/person_keypoints_valminusminival2014.json",
+    ),
+    "keypoints_coco_2014_minival_100": (
+        "coco/val2014",
+        "coco/annotations/person_keypoints_minival2014_100.json",
+    ),
+    "keypoints_coco_2017_train": (
+        "coco/train2017",
+        "coco/annotations/person_keypoints_train2017.json",
+    ),
+    "keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"),
+    "keypoints_coco_2017_val_100": (
+        "coco/val2017",
+        "coco/annotations/person_keypoints_val2017_100.json",
+    ),
+}
+
+
+_PREDEFINED_SPLITS_COCO_PANOPTIC = {
+    "coco_2017_train_panoptic": (
+        # This is the original panoptic annotation directory
+        "coco/panoptic_train2017",
+        "coco/annotations/panoptic_train2017.json",
+        # This directory contains semantic annotations that are
+        # converted from panoptic annotations.
+        # It is used by PanopticFPN.
+        # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py
+        # to create these directories.
+        "coco/panoptic_stuff_train2017",
+    ),
+    "coco_2017_val_panoptic": (
+        "coco/panoptic_val2017",
+        "coco/annotations/panoptic_val2017.json",
+        "coco/panoptic_stuff_val2017",
+    ),
+    "coco_2017_val_100_panoptic": (
+        "coco/panoptic_val2017_100",
+        "coco/annotations/panoptic_val2017_100.json",
+        "coco/panoptic_stuff_val2017_100",
+    ),
+}
+
+
+def register_all_coco(root):
+    for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items():
+        for key, (image_root, json_file) in splits_per_dataset.items():
+            # Assume pre-defined datasets live in `./datasets`.
+            register_coco_instances(
+                key,
+                _get_builtin_metadata(dataset_name),
+                os.path.join(root, json_file) if "://" not in json_file else json_file,
+                os.path.join(root, image_root),
+            )
+
+    for (
+        prefix,
+        (panoptic_root, panoptic_json, semantic_root),
+    ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items():
+        prefix_instances = prefix[: -len("_panoptic")]
+        instances_meta = MetadataCatalog.get(prefix_instances)
+        image_root, instances_json = instances_meta.image_root, instances_meta.json_file
+        # The "separated" version of COCO panoptic segmentation dataset,
+        # e.g. used by Panoptic FPN
+        register_coco_panoptic_separated(
+            prefix,
+            _get_builtin_metadata("coco_panoptic_separated"),
+            image_root,
+            os.path.join(root, panoptic_root),
+            os.path.join(root, panoptic_json),
+            os.path.join(root, semantic_root),
+            instances_json,
+        )
+        # The "standard" version of COCO panoptic segmentation dataset,
+        # e.g. used by Panoptic-DeepLab
+        register_coco_panoptic(
+            prefix,
+            _get_builtin_metadata("coco_panoptic_standard"),
+            image_root,
+            os.path.join(root, panoptic_root),
+            os.path.join(root, panoptic_json),
+            instances_json,
+        )
+
+
+# ==== Predefined datasets and splits for LVIS ==========
+
+
+_PREDEFINED_SPLITS_LVIS = {
+    "lvis_v1": {
+        "lvis_v1_train": ("coco/", "lvis/lvis_v1_train.json"),
+        "lvis_v1_val": ("coco/", "lvis/lvis_v1_val.json"),
+        "lvis_v1_test_dev": ("coco/", "lvis/lvis_v1_image_info_test_dev.json"),
+        "lvis_v1_test_challenge": ("coco/", "lvis/lvis_v1_image_info_test_challenge.json"),
+    },
+    "lvis_v0.5": {
+        "lvis_v0.5_train": ("coco/", "lvis/lvis_v0.5_train.json"),
+        "lvis_v0.5_val": ("coco/", "lvis/lvis_v0.5_val.json"),
+        "lvis_v0.5_val_rand_100": ("coco/", "lvis/lvis_v0.5_val_rand_100.json"),
+        "lvis_v0.5_test": ("coco/", "lvis/lvis_v0.5_image_info_test.json"),
+    },
+    "lvis_v0.5_cocofied": {
+        "lvis_v0.5_train_cocofied": ("coco/", "lvis/lvis_v0.5_train_cocofied.json"),
+        "lvis_v0.5_val_cocofied": ("coco/", "lvis/lvis_v0.5_val_cocofied.json"),
+    },
+}
+
+
+def register_all_lvis(root):
+    for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items():
+        for key, (image_root, json_file) in splits_per_dataset.items():
+            register_lvis_instances(
+                key,
+                get_lvis_instances_meta(dataset_name),
+                os.path.join(root, json_file) if "://" not in json_file else json_file,
+                os.path.join(root, image_root),
+            )
+
+
+# ==== Predefined splits for raw cityscapes images ===========
+_RAW_CITYSCAPES_SPLITS = {
+    "cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train/", "cityscapes/gtFine/train/"),
+    "cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val/", "cityscapes/gtFine/val/"),
+    "cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test/", "cityscapes/gtFine/test/"),
+}
+
+
+def register_all_cityscapes(root):
+    for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items():
+        meta = _get_builtin_metadata("cityscapes")
+        image_dir = os.path.join(root, image_dir)
+        gt_dir = os.path.join(root, gt_dir)
+
+        inst_key = key.format(task="instance_seg")
+        DatasetCatalog.register(
+            inst_key,
+            lambda x=image_dir, y=gt_dir: load_cityscapes_instances(
+                x, y, from_json=True, to_polygons=True
+            ),
+        )
+        MetadataCatalog.get(inst_key).set(
+            image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta
+        )
+
+        sem_key = key.format(task="sem_seg")
+        DatasetCatalog.register(
+            sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y)
+        )
+        MetadataCatalog.get(sem_key).set(
+            image_dir=image_dir,
+            gt_dir=gt_dir,
+            evaluator_type="cityscapes_sem_seg",
+            ignore_label=255,
+            **meta,
+        )
+
+
+# ==== Predefined splits for PASCAL VOC ===========
+def register_all_pascal_voc(root):
+    SPLITS = [
+        ("voc_2007_trainval", "VOC2007", "trainval"),
+        ("voc_2007_train", "VOC2007", "train"),
+        ("voc_2007_val", "VOC2007", "val"),
+        ("voc_2007_test", "VOC2007", "test"),
+        ("voc_2012_trainval", "VOC2012", "trainval"),
+        ("voc_2012_train", "VOC2012", "train"),
+        ("voc_2012_val", "VOC2012", "val"),
+    ]
+    for name, dirname, split in SPLITS:
+        year = 2007 if "2007" in name else 2012
+        register_pascal_voc(name, os.path.join(root, dirname), split, year)
+        MetadataCatalog.get(name).evaluator_type = "pascal_voc"
+
+
+def register_all_ade20k(root):
+    root = os.path.join(root, "ADEChallengeData2016")
+    for name, dirname in [("train", "training"), ("val", "validation")]:
+        image_dir = os.path.join(root, "images", dirname)
+        gt_dir = os.path.join(root, "annotations_detectron2", dirname)
+        name = f"ade20k_sem_seg_{name}"
+        DatasetCatalog.register(
+            name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
+        )
+        MetadataCatalog.get(name).set(
+            stuff_classes=ADE20K_SEM_SEG_CATEGORIES[:],
+            image_root=image_dir,
+            sem_seg_root=gt_dir,
+            evaluator_type="sem_seg",
+            ignore_label=255,
+        )
+
+
+# True for open source;
+# Internally at fb, we register them elsewhere
+if __name__.endswith(".builtin"):
+    # Assume pre-defined datasets live in `./datasets`.
+    _root = os.getenv("DETECTRON2_DATASETS", "datasets")
+    register_all_coco(_root)
+    register_all_lvis(_root)
+    register_all_cityscapes(_root)
+    register_all_cityscapes_panoptic(_root)
+    register_all_pascal_voc(_root)
+    register_all_ade20k(_root)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/builtin_meta.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/builtin_meta.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e0c7a8f52a014b997cd3764805680849d6f45ff
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/builtin_meta.py
@@ -0,0 +1,350 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+Note:
+For your custom dataset, there is no need to hard-code metadata anywhere in the code.
+For example, for COCO-format dataset, metadata will be obtained automatically
+when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways
+during loading.
+
+However, we hard-coded metadata for a few common dataset here.
+The only goal is to allow users who don't have these dataset to use pre-trained models.
+Users don't have to download a COCO json (which contains metadata), in order to visualize a
+COCO model (with correct class names and colors).
+"""
+
+
+# All coco categories, together with their nice-looking visualization colors
+# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
+COCO_CATEGORIES = [
+    {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "text"},
+  #  {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
+  #  {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
+  #  {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
+  #  {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
+  #  {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
+  #  {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
+  #  {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
+  #  {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
+  #  {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
+  #  {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
+  #  {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
+  #  {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
+  #  {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
+  #  {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
+  #  {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
+  #  {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
+  #  {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
+  #  {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
+  #  {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
+  #  {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
+  #  {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
+  #  {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
+  #  {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
+  #  {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
+  #  {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
+  #  {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
+  #  {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
+  #  {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
+  #  {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
+  #  {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
+  #  {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
+  #  {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
+  #  {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
+  #  {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
+  #  {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
+  #  {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
+  #  {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
+  #  {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
+  #  {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
+  #  {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
+  #  {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
+  #  {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
+  #  {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
+  #  {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
+  #  {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
+  #  {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
+  #  {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
+  #  {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
+  #  {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
+  #  {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
+  #  {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
+  #  {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
+  #  {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
+  #  {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
+  #  {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
+  #  {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
+  #  {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
+  #  {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
+  #  {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
+  #  {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
+  #  {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
+  #  {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
+  #  {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
+  #  {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
+  #  {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
+  #  {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
+  #  {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
+  #  {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
+  #  {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
+  #  {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
+  #  {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
+  #  {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
+  #  {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
+  #  {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
+  #  {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
+  #  {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
+  #  {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
+  #  {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
+  #  {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
+  #  {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
+  #  {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
+  #  {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
+  #  {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
+  #  {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
+  #  {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
+  #  {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
+  #  {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
+  #  {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
+  #  {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
+  #  {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
+  #  {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
+  #  {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
+  #  {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
+  #  {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
+  #  {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
+  #  {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
+  #  {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
+  #  {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
+  #  {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
+  #  {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
+  #  {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
+  #  {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
+  #  {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
+  #  {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
+  #  {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
+  #  {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
+  #  {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
+  #  {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
+  #  {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
+  #  {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
+  #  {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
+  #  {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
+  #  {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
+  #  {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
+  #  {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
+  #  {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
+  #  {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
+  #  {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
+  #  {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
+  #  {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
+  #  {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
+  #  {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
+  #  {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
+  #  {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
+  #  {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
+  #  {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
+  #  {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
+  #  {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
+  #  {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
+  #  {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
+  #  {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
+  #  {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
+]
+
+# fmt: off
+COCO_PERSON_KEYPOINT_NAMES = (
+    "nose",
+    "left_eye", "right_eye",
+    "left_ear", "right_ear",
+    "left_shoulder", "right_shoulder",
+    "left_elbow", "right_elbow",
+    "left_wrist", "right_wrist",
+    "left_hip", "right_hip",
+    "left_knee", "right_knee",
+    "left_ankle", "right_ankle",
+)
+# fmt: on
+
+# Pairs of keypoints that should be exchanged under horizontal flipping
+COCO_PERSON_KEYPOINT_FLIP_MAP = (
+    ("left_eye", "right_eye"),
+    ("left_ear", "right_ear"),
+    ("left_shoulder", "right_shoulder"),
+    ("left_elbow", "right_elbow"),
+    ("left_wrist", "right_wrist"),
+    ("left_hip", "right_hip"),
+    ("left_knee", "right_knee"),
+    ("left_ankle", "right_ankle"),
+)
+
+# rules for pairs of keypoints to draw a line between, and the line color to use.
+KEYPOINT_CONNECTION_RULES = [
+    # face
+    ("left_ear", "left_eye", (102, 204, 255)),
+    ("right_ear", "right_eye", (51, 153, 255)),
+    ("left_eye", "nose", (102, 0, 204)),
+    ("nose", "right_eye", (51, 102, 255)),
+    # upper-body
+    ("left_shoulder", "right_shoulder", (255, 128, 0)),
+    ("left_shoulder", "left_elbow", (153, 255, 204)),
+    ("right_shoulder", "right_elbow", (128, 229, 255)),
+    ("left_elbow", "left_wrist", (153, 255, 153)),
+    ("right_elbow", "right_wrist", (102, 255, 224)),
+    # lower-body
+    ("left_hip", "right_hip", (255, 102, 0)),
+    ("left_hip", "left_knee", (255, 255, 77)),
+    ("right_hip", "right_knee", (153, 255, 204)),
+    ("left_knee", "left_ankle", (191, 255, 128)),
+    ("right_knee", "right_ankle", (255, 195, 77)),
+]
+
+# All Cityscapes categories, together with their nice-looking visualization colors
+# It's from https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py  # noqa
+CITYSCAPES_CATEGORIES = [
+    {"color": (128, 64, 128), "isthing": 0, "id": 7, "trainId": 0, "name": "road"},
+    {"color": (244, 35, 232), "isthing": 0, "id": 8, "trainId": 1, "name": "sidewalk"},
+    {"color": (70, 70, 70), "isthing": 0, "id": 11, "trainId": 2, "name": "building"},
+    {"color": (102, 102, 156), "isthing": 0, "id": 12, "trainId": 3, "name": "wall"},
+    {"color": (190, 153, 153), "isthing": 0, "id": 13, "trainId": 4, "name": "fence"},
+    {"color": (153, 153, 153), "isthing": 0, "id": 17, "trainId": 5, "name": "pole"},
+    {"color": (250, 170, 30), "isthing": 0, "id": 19, "trainId": 6, "name": "traffic light"},
+    {"color": (220, 220, 0), "isthing": 0, "id": 20, "trainId": 7, "name": "traffic sign"},
+    {"color": (107, 142, 35), "isthing": 0, "id": 21, "trainId": 8, "name": "vegetation"},
+    {"color": (152, 251, 152), "isthing": 0, "id": 22, "trainId": 9, "name": "terrain"},
+    {"color": (70, 130, 180), "isthing": 0, "id": 23, "trainId": 10, "name": "sky"},
+    {"color": (220, 20, 60), "isthing": 1, "id": 24, "trainId": 11, "name": "person"},
+    {"color": (255, 0, 0), "isthing": 1, "id": 25, "trainId": 12, "name": "rider"},
+    {"color": (0, 0, 142), "isthing": 1, "id": 26, "trainId": 13, "name": "car"},
+    {"color": (0, 0, 70), "isthing": 1, "id": 27, "trainId": 14, "name": "truck"},
+    {"color": (0, 60, 100), "isthing": 1, "id": 28, "trainId": 15, "name": "bus"},
+    {"color": (0, 80, 100), "isthing": 1, "id": 31, "trainId": 16, "name": "train"},
+    {"color": (0, 0, 230), "isthing": 1, "id": 32, "trainId": 17, "name": "motorcycle"},
+    {"color": (119, 11, 32), "isthing": 1, "id": 33, "trainId": 18, "name": "bicycle"},
+]
+
+# fmt: off
+ADE20K_SEM_SEG_CATEGORIES = [
+    "wall", "building", "sky", "floor", "tree", "ceiling", "road, route", "bed", "window ", "grass", "cabinet", "sidewalk, pavement", "person", "earth, ground", "door", "table", "mountain, mount", "plant", "curtain", "chair", "car", "water", "painting, picture", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock, stone", "wardrobe, closet, press", "lamp", "tub", "rail", "cushion", "base, pedestal, stand", "box", "column, pillar", "signboard, sign", "chest of drawers, chest, bureau, dresser", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator, icebox", "grandstand, covered stand", "path", "stairs", "runway", "case, display case, showcase, vitrine", "pool table, billiard table, snooker table", "pillow", "screen door, screen", "stairway, staircase", "river", "bridge, span", "bookcase", "blind, screen", "coffee table", "toilet, can, commode, crapper, pot, potty, stool, throne", "flower", "book", "hill", "bench", "countertop", "stove", "palm, palm tree", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel, hut, hutch, shack, shanty", "bus", "towel", "light", "truck", "tower", "chandelier", "awning, sunshade, sunblind", "street lamp", "booth", "tv", "plane", "dirt track", "clothes", "pole", "land, ground, soil", "bannister, banister, balustrade, balusters, handrail", "escalator, moving staircase, moving stairway", "ottoman, pouf, pouffe, puff, hassock", "bottle", "buffet, counter, sideboard", "poster, posting, placard, notice, bill, card", "stage", "van", "ship", "fountain", "conveyer belt, conveyor belt, conveyer, conveyor, transporter", "canopy", "washer, automatic washer, washing machine", "plaything, toy", "pool", "stool", "barrel, cask", "basket, handbasket", "falls", "tent", "bag", "minibike, motorbike", "cradle", "oven", "ball", "food, solid food", "step, stair", "tank, storage tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase", "traffic light", "tray", "trash can", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass, drinking glass", "clock", "flag", # noqa
+]
+# After processed by `prepare_ade20k_sem_seg.py`, id 255 means ignore
+# fmt: on
+
+
+def _get_coco_instances_meta():
+    thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+    thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+   # assert len(thing_ids) == 80, len(thing_ids)
+    # Mapping from the incontiguous COCO category id to an id in [0, 79]
+    thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
+    thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+    ret = {
+        "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
+        "thing_classes": thing_classes,
+        "thing_colors": thing_colors,
+    }
+    return ret
+
+
+def _get_coco_panoptic_separated_meta():
+    """
+    Returns metadata for "separated" version of the panoptic segmentation dataset.
+    """
+    stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0]
+   # assert len(stuff_ids) == 53, len(stuff_ids)
+
+    # For semantic segmentation, this mapping maps from contiguous stuff id
+    # (in [0, 53], used in models) to ids in the dataset (used for processing results)
+    # The id 0 is mapped to an extra category "thing".
+    stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)}
+    # When converting COCO panoptic annotations to semantic annotations
+    # We label the "thing" category to 0
+    stuff_dataset_id_to_contiguous_id[0] = 0
+
+    # 54 names for COCO stuff categories (including "things")
+    stuff_classes = ["things"] + [
+        k["name"].replace("-other", "").replace("-merged", "")
+        for k in COCO_CATEGORIES
+        if k["isthing"] == 0
+    ]
+
+    # NOTE: I randomly picked a color for things
+    stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0]
+    ret = {
+        "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
+        "stuff_classes": stuff_classes,
+        "stuff_colors": stuff_colors,
+    }
+    ret.update(_get_coco_instances_meta())
+    return ret
+
+
+def _get_builtin_metadata(dataset_name):
+    if dataset_name == "coco":
+        return _get_coco_instances_meta()
+    if dataset_name == "coco_panoptic_separated":
+        return _get_coco_panoptic_separated_meta()
+    elif dataset_name == "coco_panoptic_standard":
+        meta = {}
+        # The following metadata maps contiguous id from [0, #thing categories +
+        # #stuff categories) to their names and colors. We have to replica of the
+        # same name and color under "thing_*" and "stuff_*" because the current
+        # visualization function in D2 handles thing and class classes differently
+        # due to some heuristic used in Panoptic FPN. We keep the same naming to
+        # enable reusing existing visualization functions.
+        thing_classes = [k["name"] for k in COCO_CATEGORIES]
+        thing_colors = [k["color"] for k in COCO_CATEGORIES]
+        stuff_classes = [k["name"] for k in COCO_CATEGORIES]
+        stuff_colors = [k["color"] for k in COCO_CATEGORIES]
+
+        meta["thing_classes"] = thing_classes
+        meta["thing_colors"] = thing_colors
+        meta["stuff_classes"] = stuff_classes
+        meta["stuff_colors"] = stuff_colors
+
+        # Convert category id for training:
+        #   category id: like semantic segmentation, it is the class id for each
+        #   pixel. Since there are some classes not used in evaluation, the category
+        #   id is not always contiguous and thus we have two set of category ids:
+        #       - original category id: category id in the original dataset, mainly
+        #           used for evaluation.
+        #       - contiguous category id: [0, #classes), in order to train the linear
+        #           softmax classifier.
+        thing_dataset_id_to_contiguous_id = {}
+        stuff_dataset_id_to_contiguous_id = {}
+
+        for i, cat in enumerate(COCO_CATEGORIES):
+            if cat["isthing"]:
+                thing_dataset_id_to_contiguous_id[cat["id"]] = i
+            else:
+                stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+
+        meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+        meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+        return meta
+    elif dataset_name == "coco_person":
+        return {
+            "thing_classes": ["person"],
+            "keypoint_names": COCO_PERSON_KEYPOINT_NAMES,
+            "keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP,
+            "keypoint_connection_rules": KEYPOINT_CONNECTION_RULES,
+        }
+    elif dataset_name == "cityscapes":
+        # fmt: off
+        CITYSCAPES_THING_CLASSES = [
+            "person", "rider", "car", "truck",
+            "bus", "train", "motorcycle", "bicycle",
+        ]
+        CITYSCAPES_STUFF_CLASSES = [
+            "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light",
+            "traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car",
+            "truck", "bus", "train", "motorcycle", "bicycle",
+        ]
+        # fmt: on
+        return {
+            "thing_classes": CITYSCAPES_THING_CLASSES,
+            "stuff_classes": CITYSCAPES_STUFF_CLASSES,
+        }
+    raise KeyError("No built-in metadata for dataset {}".format(dataset_name))
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/cityscapes.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e84a5bdb3d4e410d8eef4b80a5d4c099a180104
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/cityscapes.py
@@ -0,0 +1,329 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import functools
+import json
+import logging
+import multiprocessing as mp
+import numpy as np
+import os
+from itertools import chain
+import pycocotools.mask as mask_util
+from PIL import Image
+
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import get_world_size
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+
+try:
+    import cv2  # noqa
+except ImportError:
+    # OpenCV is an optional dependency at the moment
+    pass
+
+
+logger = logging.getLogger(__name__)
+
+
+def _get_cityscapes_files(image_dir, gt_dir):
+    files = []
+    # scan through the directory
+    cities = PathManager.ls(image_dir)
+    logger.info(f"{len(cities)} cities found in '{image_dir}'.")
+    for city in cities:
+        city_img_dir = os.path.join(image_dir, city)
+        city_gt_dir = os.path.join(gt_dir, city)
+        for basename in PathManager.ls(city_img_dir):
+            image_file = os.path.join(city_img_dir, basename)
+
+            suffix = "leftImg8bit.png"
+            assert basename.endswith(suffix), basename
+            basename = basename[: -len(suffix)]
+
+            instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png")
+            label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png")
+            json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json")
+
+            files.append((image_file, instance_file, label_file, json_file))
+    assert len(files), "No images found in {}".format(image_dir)
+    for f in files[0]:
+        assert PathManager.isfile(f), f
+    return files
+
+
+def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+        gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
+        from_json (bool): whether to read annotations from the raw json file or the png files.
+        to_polygons (bool): whether to represent the segmentation as polygons
+            (COCO's format) instead of masks (cityscapes's format).
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+    if from_json:
+        assert to_polygons, (
+            "Cityscapes's json annotations are in polygon format. "
+            "Converting to mask format is not supported now."
+        )
+    files = _get_cityscapes_files(image_dir, gt_dir)
+
+    logger.info("Preprocessing cityscapes annotations ...")
+    # This is still not fast: all workers will execute duplicate works and will
+    # take up to 10m on a 8GPU server.
+    pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4))
+
+    ret = pool.map(
+        functools.partial(_cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons),
+        files,
+    )
+    logger.info("Loaded {} images from {}".format(len(ret), image_dir))
+
+    # Map cityscape ids to contiguous ids
+    from cityscapesscripts.helpers.labels import labels
+
+    labels = [l for l in labels if l.hasInstances and not l.ignoreInEval]
+    dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)}
+    for dict_per_image in ret:
+        for anno in dict_per_image["annotations"]:
+            anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]]
+    return ret
+
+
+def load_cityscapes_semantic(image_dir, gt_dir):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+        gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
+
+    Returns:
+        list[dict]: a list of dict, each has "file_name" and
+            "sem_seg_file_name".
+    """
+    ret = []
+    # gt_dir is small and contain many small files. make sense to fetch to local first
+    gt_dir = PathManager.get_local_path(gt_dir)
+    for image_file, _, label_file, json_file in _get_cityscapes_files(image_dir, gt_dir):
+        label_file = label_file.replace("labelIds", "labelTrainIds")
+
+        with PathManager.open(json_file, "r") as f:
+            jsonobj = json.load(f)
+        ret.append(
+            {
+                "file_name": image_file,
+                "sem_seg_file_name": label_file,
+                "height": jsonobj["imgHeight"],
+                "width": jsonobj["imgWidth"],
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(
+        ret[0]["sem_seg_file_name"]
+    ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py"  # noqa
+    return ret
+
+
+def _cityscapes_files_to_dict(files, from_json, to_polygons):
+    """
+    Parse cityscapes annotation files to a instance segmentation dataset dict.
+
+    Args:
+        files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file)
+        from_json (bool): whether to read annotations from the raw json file or the png files.
+        to_polygons (bool): whether to represent the segmentation as polygons
+            (COCO's format) instead of masks (cityscapes's format).
+
+    Returns:
+        A dict in Detectron2 Dataset format.
+    """
+    from cityscapesscripts.helpers.labels import id2label, name2label
+
+    image_file, instance_id_file, _, json_file = files
+
+    annos = []
+
+    if from_json:
+        from shapely.geometry import MultiPolygon, Polygon
+
+        with PathManager.open(json_file, "r") as f:
+            jsonobj = json.load(f)
+        ret = {
+            "file_name": image_file,
+            "image_id": os.path.basename(image_file),
+            "height": jsonobj["imgHeight"],
+            "width": jsonobj["imgWidth"],
+        }
+
+        # `polygons_union` contains the union of all valid polygons.
+        polygons_union = Polygon()
+
+        # CityscapesScripts draw the polygons in sequential order
+        # and each polygon *overwrites* existing ones. See
+        # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa
+        # We use reverse order, and each polygon *avoids* early ones.
+        # This will resolve the ploygon overlaps in the same way as CityscapesScripts.
+        for obj in jsonobj["objects"][::-1]:
+            if "deleted" in obj:  # cityscapes data format specific
+                continue
+            label_name = obj["label"]
+
+            try:
+                label = name2label[label_name]
+            except KeyError:
+                if label_name.endswith("group"):  # crowd area
+                    label = name2label[label_name[: -len("group")]]
+                else:
+                    raise
+            if label.id < 0:  # cityscapes data format
+                continue
+
+            # Cityscapes's raw annotations uses integer coordinates
+            # Therefore +0.5 here
+            poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5
+            # CityscapesScript uses PIL.ImageDraw.polygon to rasterize
+            # polygons for evaluation. This function operates in integer space
+            # and draws each pixel whose center falls into the polygon.
+            # Therefore it draws a polygon which is 0.5 "fatter" in expectation.
+            # We therefore dilate the input polygon by 0.5 as our input.
+            poly = Polygon(poly_coord).buffer(0.5, resolution=4)
+
+            if not label.hasInstances or label.ignoreInEval:
+                # even if we won't store the polygon it still contributes to overlaps resolution
+                polygons_union = polygons_union.union(poly)
+                continue
+
+            # Take non-overlapping part of the polygon
+            poly_wo_overlaps = poly.difference(polygons_union)
+            if poly_wo_overlaps.is_empty:
+                continue
+            polygons_union = polygons_union.union(poly)
+
+            anno = {}
+            anno["iscrowd"] = label_name.endswith("group")
+            anno["category_id"] = label.id
+
+            if isinstance(poly_wo_overlaps, Polygon):
+                poly_list = [poly_wo_overlaps]
+            elif isinstance(poly_wo_overlaps, MultiPolygon):
+                poly_list = poly_wo_overlaps.geoms
+            else:
+                raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps))
+
+            poly_coord = []
+            for poly_el in poly_list:
+                # COCO API can work only with exterior boundaries now, hence we store only them.
+                # TODO: store both exterior and interior boundaries once other parts of the
+                # codebase support holes in polygons.
+                poly_coord.append(list(chain(*poly_el.exterior.coords)))
+            anno["segmentation"] = poly_coord
+            (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds
+
+            anno["bbox"] = (xmin, ymin, xmax, ymax)
+            anno["bbox_mode"] = BoxMode.XYXY_ABS
+
+            annos.append(anno)
+    else:
+        # See also the official annotation parsing scripts at
+        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py  # noqa
+        with PathManager.open(instance_id_file, "rb") as f:
+            inst_image = np.asarray(Image.open(f), order="F")
+        # ids < 24 are stuff labels (filtering them first is about 5% faster)
+        flattened_ids = np.unique(inst_image[inst_image >= 24])
+
+        ret = {
+            "file_name": image_file,
+            "image_id": os.path.basename(image_file),
+            "height": inst_image.shape[0],
+            "width": inst_image.shape[1],
+        }
+
+        for instance_id in flattened_ids:
+            # For non-crowd annotations, instance_id // 1000 is the label_id
+            # Crowd annotations have <1000 instance ids
+            label_id = instance_id // 1000 if instance_id >= 1000 else instance_id
+            label = id2label[label_id]
+            if not label.hasInstances or label.ignoreInEval:
+                continue
+
+            anno = {}
+            anno["iscrowd"] = instance_id < 1000
+            anno["category_id"] = label.id
+
+            mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F")
+
+            inds = np.nonzero(mask)
+            ymin, ymax = inds[0].min(), inds[0].max()
+            xmin, xmax = inds[1].min(), inds[1].max()
+            anno["bbox"] = (xmin, ymin, xmax, ymax)
+            if xmax <= xmin or ymax <= ymin:
+                continue
+            anno["bbox_mode"] = BoxMode.XYXY_ABS
+            if to_polygons:
+                # This conversion comes from D4809743 and D5171122,
+                # when Mask-RCNN was first developed.
+                contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[
+                    -2
+                ]
+                polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3]
+                # opencv's can produce invalid polygons
+                if len(polygons) == 0:
+                    continue
+                anno["segmentation"] = polygons
+            else:
+                anno["segmentation"] = mask_util.encode(mask[:, :, None])[0]
+            annos.append(anno)
+    ret["annotations"] = annos
+    return ret
+
+
+if __name__ == "__main__":
+    """
+    Test the cityscapes dataset loader.
+
+    Usage:
+        python -m detectron2.data.datasets.cityscapes \
+            cityscapes/leftImg8bit/train cityscapes/gtFine/train
+    """
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("image_dir")
+    parser.add_argument("gt_dir")
+    parser.add_argument("--type", choices=["instance", "semantic"], default="instance")
+    args = parser.parse_args()
+    from detectron2.data.catalog import Metadata
+    from detectron2.utils.visualizer import Visualizer
+    from cityscapesscripts.helpers.labels import labels
+
+    logger = setup_logger(name=__name__)
+
+    dirname = "cityscapes-data-vis"
+    os.makedirs(dirname, exist_ok=True)
+
+    if args.type == "instance":
+        dicts = load_cityscapes_instances(
+            args.image_dir, args.gt_dir, from_json=True, to_polygons=True
+        )
+        logger.info("Done loading {} samples.".format(len(dicts)))
+
+        thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval]
+        meta = Metadata().set(thing_classes=thing_classes)
+
+    else:
+        dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir)
+        logger.info("Done loading {} samples.".format(len(dicts)))
+
+        stuff_classes = [k.name for k in labels if k.trainId != 255]
+        stuff_colors = [k.color for k in labels if k.trainId != 255]
+        meta = Metadata().set(stuff_classes=stuff_classes, stuff_colors=stuff_colors)
+
+    for d in dicts:
+        img = np.array(Image.open(PathManager.open(d["file_name"], "rb")))
+        visualizer = Visualizer(img, metadata=meta)
+        vis = visualizer.draw_dataset_dict(d)
+        # cv2.imshow("a", vis.get_image()[:, :, ::-1])
+        # cv2.waitKey()
+        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+        vis.save(fpath)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/cityscapes_panoptic.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/cityscapes_panoptic.py
new file mode 100644
index 0000000000000000000000000000000000000000..48c136f1623261b079591065fec7c7fc38165076
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/cityscapes_panoptic.py
@@ -0,0 +1,187 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import json
+import logging
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets.builtin_meta import CITYSCAPES_CATEGORIES
+from detectron2.utils.file_io import PathManager
+
+"""
+This file contains functions to register the Cityscapes panoptic dataset to the DatasetCatalog.
+"""
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info):
+    files = []
+    # scan through the directory
+    cities = PathManager.ls(image_dir)
+    logger.info(f"{len(cities)} cities found in '{image_dir}'.")
+    image_dict = {}
+    for city in cities:
+        city_img_dir = os.path.join(image_dir, city)
+        for basename in PathManager.ls(city_img_dir):
+            image_file = os.path.join(city_img_dir, basename)
+
+            suffix = "_leftImg8bit.png"
+            assert basename.endswith(suffix), basename
+            basename = os.path.basename(basename)[: -len(suffix)]
+
+            image_dict[basename] = image_file
+
+    for ann in json_info["annotations"]:
+        image_file = image_dict.get(ann["image_id"], None)
+        assert image_file is not None, "No image {} found for annotation {}".format(
+            ann["image_id"], ann["file_name"]
+        )
+        label_file = os.path.join(gt_dir, ann["file_name"])
+        segments_info = ann["segments_info"]
+
+        files.append((image_file, label_file, segments_info))
+
+    assert len(files), "No images found in {}".format(image_dir)
+    assert PathManager.isfile(files[0][0]), files[0][0]
+    assert PathManager.isfile(files[0][1]), files[0][1]
+    return files
+
+
+def load_cityscapes_panoptic(image_dir, gt_dir, gt_json, meta):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+        gt_dir (str): path to the raw annotations. e.g.,
+            "~/cityscapes/gtFine/cityscapes_panoptic_train".
+        gt_json (str): path to the json file. e.g.,
+            "~/cityscapes/gtFine/cityscapes_panoptic_train.json".
+        meta (dict): dictionary containing "thing_dataset_id_to_contiguous_id"
+            and "stuff_dataset_id_to_contiguous_id" to map category ids to
+            contiguous ids for training.
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+
+    def _convert_category_id(segment_info, meta):
+        if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+            segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+        else:
+            segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+        return segment_info
+
+    assert os.path.exists(
+        gt_json
+    ), "Please run `python cityscapesscripts/preparation/createPanopticImgs.py` to generate label files."  # noqa
+    with open(gt_json) as f:
+        json_info = json.load(f)
+    files = get_cityscapes_panoptic_files(image_dir, gt_dir, json_info)
+    ret = []
+    for image_file, label_file, segments_info in files:
+        sem_label_file = (
+            image_file.replace("leftImg8bit", "gtFine").split(".")[0] + "_labelTrainIds.png"
+        )
+        segments_info = [_convert_category_id(x, meta) for x in segments_info]
+        ret.append(
+            {
+                "file_name": image_file,
+                "image_id": "_".join(
+                    os.path.splitext(os.path.basename(image_file))[0].split("_")[:3]
+                ),
+                "sem_seg_file_name": sem_label_file,
+                "pan_seg_file_name": label_file,
+                "segments_info": segments_info,
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(
+        ret[0]["sem_seg_file_name"]
+    ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py"  # noqa
+    assert PathManager.isfile(
+        ret[0]["pan_seg_file_name"]
+    ), "Please generate panoptic annotation with python cityscapesscripts/preparation/createPanopticImgs.py"  # noqa
+    return ret
+
+
+_RAW_CITYSCAPES_PANOPTIC_SPLITS = {
+    "cityscapes_fine_panoptic_train": (
+        "cityscapes/leftImg8bit/train",
+        "cityscapes/gtFine/cityscapes_panoptic_train",
+        "cityscapes/gtFine/cityscapes_panoptic_train.json",
+    ),
+    "cityscapes_fine_panoptic_val": (
+        "cityscapes/leftImg8bit/val",
+        "cityscapes/gtFine/cityscapes_panoptic_val",
+        "cityscapes/gtFine/cityscapes_panoptic_val.json",
+    ),
+    # "cityscapes_fine_panoptic_test": not supported yet
+}
+
+
+def register_all_cityscapes_panoptic(root):
+    meta = {}
+    # The following metadata maps contiguous id from [0, #thing categories +
+    # #stuff categories) to their names and colors. We have to replica of the
+    # same name and color under "thing_*" and "stuff_*" because the current
+    # visualization function in D2 handles thing and class classes differently
+    # due to some heuristic used in Panoptic FPN. We keep the same naming to
+    # enable reusing existing visualization functions.
+    thing_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
+    thing_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
+    stuff_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
+    stuff_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
+
+    meta["thing_classes"] = thing_classes
+    meta["thing_colors"] = thing_colors
+    meta["stuff_classes"] = stuff_classes
+    meta["stuff_colors"] = stuff_colors
+
+    # There are three types of ids in cityscapes panoptic segmentation:
+    # (1) category id: like semantic segmentation, it is the class id for each
+    #   pixel. Since there are some classes not used in evaluation, the category
+    #   id is not always contiguous and thus we have two set of category ids:
+    #       - original category id: category id in the original dataset, mainly
+    #           used for evaluation.
+    #       - contiguous category id: [0, #classes), in order to train the classifier
+    # (2) instance id: this id is used to differentiate different instances from
+    #   the same category. For "stuff" classes, the instance id is always 0; for
+    #   "thing" classes, the instance id starts from 1 and 0 is reserved for
+    #   ignored instances (e.g. crowd annotation).
+    # (3) panoptic id: this is the compact id that encode both category and
+    #   instance id by: category_id * 1000 + instance_id.
+    thing_dataset_id_to_contiguous_id = {}
+    stuff_dataset_id_to_contiguous_id = {}
+
+    for k in CITYSCAPES_CATEGORIES:
+        if k["isthing"] == 1:
+            thing_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
+        else:
+            stuff_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
+
+    meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+    meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+    for key, (image_dir, gt_dir, gt_json) in _RAW_CITYSCAPES_PANOPTIC_SPLITS.items():
+        image_dir = os.path.join(root, image_dir)
+        gt_dir = os.path.join(root, gt_dir)
+        gt_json = os.path.join(root, gt_json)
+
+        DatasetCatalog.register(
+            key, lambda x=image_dir, y=gt_dir, z=gt_json: load_cityscapes_panoptic(x, y, z, meta)
+        )
+        MetadataCatalog.get(key).set(
+            panoptic_root=gt_dir,
+            image_root=image_dir,
+            panoptic_json=gt_json,
+            gt_dir=gt_dir.replace("cityscapes_panoptic_", ""),
+            evaluator_type="cityscapes_panoptic_seg",
+            ignore_label=255,
+            label_divisor=1000,
+            **meta,
+        )
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/coco.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..4820f008afefeb4f61285063076b2f1bd7228b38
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/coco.py
@@ -0,0 +1,532 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import datetime
+import io
+import json
+import logging
+import numpy as np
+import os
+import shutil
+import pycocotools.mask as mask_util
+from fvcore.common.timer import Timer
+from iopath.common.file_io import file_lock
+from PIL import Image
+
+from detectron2.structures import Boxes, BoxMode, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from .. import DatasetCatalog, MetadataCatalog
+
+"""
+This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format".
+"""
+
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["load_coco_json", "load_sem_seg", "convert_to_coco_json", "register_coco_instances"]
+
+
+def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
+    """
+    Load a json file with COCO's instances annotation format.
+    Currently supports instance detection, instance segmentation,
+    and person keypoints annotations.
+
+    Args:
+        json_file (str): full path to the json file in COCO instances annotation format.
+        image_root (str or path-like): the directory where the images in this json file exists.
+        dataset_name (str or None): the name of the dataset (e.g., coco_2017_train).
+            When provided, this function will also do the following:
+
+            * Put "thing_classes" into the metadata associated with this dataset.
+            * Map the category ids into a contiguous range (needed by standard dataset format),
+              and add "thing_dataset_id_to_contiguous_id" to the metadata associated
+              with this dataset.
+
+            This option should usually be provided, unless users need to load
+            the original json content and apply more processing manually.
+        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
+            loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
+            "category_id", "segmentation"). The values for these keys will be returned as-is.
+            For example, the densepose annotations are loaded in this way.
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ ) when `dataset_name` is not None.
+        If `dataset_name` is None, the returned `category_ids` may be
+        incontiguous and may not conform to the Detectron2 standard format.
+
+    Notes:
+        1. This function does not read the image files.
+           The results do not have the "image" field.
+    """
+    from pycocotools.coco import COCO
+
+    timer = Timer()
+    json_file = PathManager.get_local_path(json_file)
+    with contextlib.redirect_stdout(io.StringIO()):
+        coco_api = COCO(json_file)
+    if timer.seconds() > 1:
+        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+
+    id_map = None
+    if dataset_name is not None:
+        meta = MetadataCatalog.get(dataset_name)
+        cat_ids = sorted(coco_api.getCatIds())
+        cats = coco_api.loadCats(cat_ids)
+        # The categories in a custom json file may not be sorted.
+        thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
+        meta.thing_classes = thing_classes
+
+        # In COCO, certain category ids are artificially removed,
+        # and by convention they are always ignored.
+        # We deal with COCO's id issue and translate
+        # the category ids to contiguous ids in [0, 80).
+
+        # It works by looking at the "categories" field in the json, therefore
+        # if users' own json also have incontiguous ids, we'll
+        # apply this mapping as well but print a warning.
+        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
+            if "coco" not in dataset_name:
+                logger.warning(
+                    """
+Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
+"""
+                )
+        id_map = {v: i for i, v in enumerate(cat_ids)}
+        meta.thing_dataset_id_to_contiguous_id = id_map
+
+    # sort indices for reproducible results
+    img_ids = sorted(coco_api.imgs.keys())
+    # imgs is a list of dicts, each looks something like:
+    # {'license': 4,
+    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+    #  'file_name': 'COCO_val2014_000000001268.jpg',
+    #  'height': 427,
+    #  'width': 640,
+    #  'date_captured': '2013-11-17 05:57:24',
+    #  'id': 1268}
+    imgs = coco_api.loadImgs(img_ids)
+    # anns is a list[list[dict]], where each dict is an annotation
+    # record for an object. The inner list enumerates the objects in an image
+    # and the outer list enumerates over images. Example of anns[0]:
+    # [{'segmentation': [[192.81,
+    #     247.09,
+    #     ...
+    #     219.03,
+    #     249.06]],
+    #   'area': 1035.749,
+    #   'iscrowd': 0,
+    #   'image_id': 1268,
+    #   'bbox': [192.81, 224.8, 74.73, 33.43],
+    #   'category_id': 16,
+    #   'id': 42986},
+    #  ...]
+    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
+    total_num_valid_anns = sum([len(x) for x in anns])
+    total_num_anns = len(coco_api.anns)
+    if total_num_valid_anns < total_num_anns:
+        logger.warning(
+            f"{json_file} contains {total_num_anns} annotations, but only "
+            f"{total_num_valid_anns} of them match to images in the file."
+        )
+
+    if "minival" not in json_file:
+        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
+        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
+        # Therefore we explicitly white-list them.
+        ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+        assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
+            json_file
+        )
+
+    imgs_anns = list(zip(imgs, anns))
+    logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
+
+    dataset_dicts = []
+
+    ann_keys = ["iscrowd", "bbox", "keypoints", "category_id", "rec"] + (extra_annotation_keys or [])
+
+    num_instances_without_valid_segmentation = 0
+
+    for (img_dict, anno_dict_list) in imgs_anns:
+        record = {}
+        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
+        record["height"] = img_dict["height"]
+        record["width"] = img_dict["width"]
+        image_id = record["image_id"] = img_dict["id"]
+
+        objs = []
+        for anno in anno_dict_list:
+            # Check that the image_id in this annotation is the same as
+            # the image_id we're looking at.
+            # This fails only when the data parsing logic or the annotation file is buggy.
+
+            # The original COCO valminusminival2014 & minival2014 annotation files
+            # actually contains bugs that, together with certain ways of using COCO API,
+            # can trigger this assertion.
+            assert anno["image_id"] == image_id
+
+            assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'
+
+            obj = {key: anno[key] for key in ann_keys if key in anno}
+            segm = anno.get("segmentation", None)
+            if segm:  # either list[list[float]] or dict(RLE)
+                if isinstance(segm, dict):
+                    if isinstance(segm["counts"], list):
+                        # convert to compressed RLE
+                        segm = mask_util.frPyObjects(segm, *segm["size"])
+                else:
+                    # filter out invalid polygons (< 3 points)
+                    segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+                    if len(segm) == 0:
+                        num_instances_without_valid_segmentation += 1
+                        continue  # ignore this instance
+                obj["segmentation"] = segm
+            keypts = anno.get("keypoints", None)
+            if keypts:  # list[int]
+                for idx, v in enumerate(keypts):
+                    if idx % 3 != 2:
+                        # COCO's segmentation coordinates are floating points in [0, H or W],
+                        # but keypoint coordinates are integers in [0, H-1 or W-1]
+                        # Therefore we assume the coordinates are "pixel indices" and
+                        # add 0.5 to convert to floating point coordinates.
+                        keypts[idx] = v + 0.5
+                obj["keypoints"] = keypts
+
+            obj["bbox_mode"] = BoxMode.XYWH_ABS
+            if id_map:
+                annotation_category_id = obj["category_id"]
+                try:
+                    obj["category_id"] = id_map[annotation_category_id]
+                except KeyError as e:
+                    raise KeyError(
+                        f"Encountered category_id={annotation_category_id} "
+                        "but this id does not exist in 'categories' of the json file."
+                    ) from e
+            objs.append(obj)
+        record["annotations"] = objs
+        dataset_dicts.append(record)
+
+    if num_instances_without_valid_segmentation > 0:
+        logger.warning(
+            "Filtered out {} instances without valid segmentation. ".format(
+                num_instances_without_valid_segmentation
+            )
+            + "There might be issues in your dataset generation process. "
+            "A valid polygon should be a list[float] with even length >= 6."
+        )
+    return dataset_dicts
+
+
+def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
+    """
+    Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are
+    treated as ground truth annotations and all files under "image_root" with "image_ext" extension
+    as input images. Ground truth and input images are matched using file paths relative to
+    "gt_root" and "image_root" respectively without taking into account file extensions.
+    This works for COCO as well as some other datasets.
+
+    Args:
+        gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
+            annotations are stored as images with integer values in pixels that represent
+            corresponding semantic labels.
+        image_root (str): the directory where the input images are.
+        gt_ext (str): file extension for ground truth annotations.
+        image_ext (str): file extension for input images.
+
+    Returns:
+        list[dict]:
+            a list of dicts in detectron2 standard format without instance-level
+            annotation.
+
+    Notes:
+        1. This function does not read the image and ground truth files.
+           The results do not have the "image" and "sem_seg" fields.
+    """
+
+    # We match input images with ground truth based on their relative filepaths (without file
+    # extensions) starting from 'image_root' and 'gt_root' respectively.
+    def file2id(folder_path, file_path):
+        # extract relative path starting from `folder_path`
+        image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
+        # remove file extension
+        image_id = os.path.splitext(image_id)[0]
+        return image_id
+
+    input_files = sorted(
+        (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
+        key=lambda file_path: file2id(image_root, file_path),
+    )
+    gt_files = sorted(
+        (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
+        key=lambda file_path: file2id(gt_root, file_path),
+    )
+
+    assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
+
+    # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
+    if len(input_files) != len(gt_files):
+        logger.warn(
+            "Directory {} and {} has {} and {} files, respectively.".format(
+                image_root, gt_root, len(input_files), len(gt_files)
+            )
+        )
+        input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
+        gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
+        intersect = list(set(input_basenames) & set(gt_basenames))
+        # sort, otherwise each worker may obtain a list[dict] in different order
+        intersect = sorted(intersect)
+        logger.warn("Will use their intersection of {} files.".format(len(intersect)))
+        input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
+        gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
+
+    logger.info(
+        "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root)
+    )
+
+    dataset_dicts = []
+    for (img_path, gt_path) in zip(input_files, gt_files):
+        record = {}
+        record["file_name"] = img_path
+        record["sem_seg_file_name"] = gt_path
+        dataset_dicts.append(record)
+
+    return dataset_dicts
+
+
+def convert_to_coco_dict(dataset_name):
+    """
+    Convert an instance detection/segmentation or keypoint detection dataset
+    in detectron2's standard format into COCO json format.
+
+    Generic dataset description can be found here:
+    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset
+
+    COCO data format description can be found here:
+    http://cocodataset.org/#format-data
+
+    Args:
+        dataset_name (str):
+            name of the source dataset
+            Must be registered in DatastCatalog and in detectron2's standard format.
+            Must have corresponding metadata "thing_classes"
+    Returns:
+        coco_dict: serializable dict in COCO json format
+    """
+
+    dataset_dicts = DatasetCatalog.get(dataset_name)
+    metadata = MetadataCatalog.get(dataset_name)
+
+    # unmap the category mapping ids for COCO
+    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
+        reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()}
+        reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id]  # noqa
+    else:
+        reverse_id_mapper = lambda contiguous_id: contiguous_id  # noqa
+
+    categories = [
+        {"id": reverse_id_mapper(id), "name": name}
+        for id, name in enumerate(metadata.thing_classes)
+    ]
+
+    logger.info("Converting dataset dicts into COCO format")
+    coco_images = []
+    coco_annotations = []
+
+    for image_id, image_dict in enumerate(dataset_dicts):
+        coco_image = {
+            "id": image_dict.get("image_id", image_id),
+            "width": int(image_dict["width"]),
+            "height": int(image_dict["height"]),
+            "file_name": str(image_dict["file_name"]),
+        }
+        coco_images.append(coco_image)
+
+        anns_per_image = image_dict.get("annotations", [])
+        for annotation in anns_per_image:
+            # create a new dict with only COCO fields
+            coco_annotation = {}
+
+            # COCO requirement: XYWH box format for axis-align and XYWHA for rotated
+            bbox = annotation["bbox"]
+            if isinstance(bbox, np.ndarray):
+                if bbox.ndim != 1:
+                    raise ValueError(f"bbox has to be 1-dimensional. Got shape={bbox.shape}.")
+                bbox = bbox.tolist()
+            if len(bbox) not in [4, 5]:
+                raise ValueError(f"bbox has to has length 4 or 5. Got {bbox}.")
+            from_bbox_mode = annotation["bbox_mode"]
+            to_bbox_mode = BoxMode.XYWH_ABS if len(bbox) == 4 else BoxMode.XYWHA_ABS
+            bbox = BoxMode.convert(bbox, from_bbox_mode, to_bbox_mode)
+
+            # COCO requirement: instance area
+            if "segmentation" in annotation:
+                # Computing areas for instances by counting the pixels
+                segmentation = annotation["segmentation"]
+                # TODO: check segmentation type: RLE, BinaryMask or Polygon
+                if isinstance(segmentation, list):
+                    polygons = PolygonMasks([segmentation])
+                    area = polygons.area()[0].item()
+                elif isinstance(segmentation, dict):  # RLE
+                    area = mask_util.area(segmentation).item()
+                else:
+                    raise TypeError(f"Unknown segmentation type {type(segmentation)}!")
+            else:
+                # Computing areas using bounding boxes
+                if to_bbox_mode == BoxMode.XYWH_ABS:
+                    bbox_xy = BoxMode.convert(bbox, to_bbox_mode, BoxMode.XYXY_ABS)
+                    area = Boxes([bbox_xy]).area()[0].item()
+                else:
+                    area = RotatedBoxes([bbox]).area()[0].item()
+
+            if "keypoints" in annotation:
+                keypoints = annotation["keypoints"]  # list[int]
+                for idx, v in enumerate(keypoints):
+                    if idx % 3 != 2:
+                        # COCO's segmentation coordinates are floating points in [0, H or W],
+                        # but keypoint coordinates are integers in [0, H-1 or W-1]
+                        # For COCO format consistency we substract 0.5
+                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
+                        keypoints[idx] = v - 0.5
+                if "num_keypoints" in annotation:
+                    num_keypoints = annotation["num_keypoints"]
+                else:
+                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])
+
+            # COCO requirement:
+            #   linking annotations to images
+            #   "id" field must start with 1
+            coco_annotation["id"] = len(coco_annotations) + 1
+            coco_annotation["image_id"] = coco_image["id"]
+            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
+            coco_annotation["area"] = float(area)
+            coco_annotation["iscrowd"] = int(annotation.get("iscrowd", 0))
+            coco_annotation["category_id"] = int(reverse_id_mapper(annotation["category_id"]))
+
+            # Add optional fields
+            if "keypoints" in annotation:
+                coco_annotation["keypoints"] = keypoints
+                coco_annotation["num_keypoints"] = num_keypoints
+
+            if "segmentation" in annotation:
+                seg = coco_annotation["segmentation"] = annotation["segmentation"]
+                if isinstance(seg, dict):  # RLE
+                    counts = seg["counts"]
+                    if not isinstance(counts, str):
+                        # make it json-serializable
+                        seg["counts"] = counts.decode("ascii")
+
+            coco_annotations.append(coco_annotation)
+
+    logger.info(
+        "Conversion finished, "
+        f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}"
+    )
+
+    info = {
+        "date_created": str(datetime.datetime.now()),
+        "description": "Automatically generated COCO json file for Detectron2.",
+    }
+    coco_dict = {"info": info, "images": coco_images, "categories": categories, "licenses": None}
+    if len(coco_annotations) > 0:
+        coco_dict["annotations"] = coco_annotations
+    return coco_dict
+
+
+def convert_to_coco_json(dataset_name, output_file, allow_cached=True):
+    """
+    Converts dataset into COCO format and saves it to a json file.
+    dataset_name must be registered in DatasetCatalog and in detectron2's standard format.
+
+    Args:
+        dataset_name:
+            reference from the config file to the catalogs
+            must be registered in DatasetCatalog and in detectron2's standard format
+        output_file: path of json file that will be saved to
+        allow_cached: if json file is already present then skip conversion
+    """
+
+    # TODO: The dataset or the conversion script *may* change,
+    # a checksum would be useful for validating the cached data
+
+    PathManager.mkdirs(os.path.dirname(output_file))
+    with file_lock(output_file):
+        if PathManager.exists(output_file) and allow_cached:
+            logger.warning(
+                f"Using previously cached COCO format annotations at '{output_file}'. "
+                "You need to clear the cache file if your dataset has been modified."
+            )
+        else:
+            logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)")
+            coco_dict = convert_to_coco_dict(dataset_name)
+
+            logger.info(f"Caching COCO format annotations at '{output_file}' ...")
+            tmp_file = output_file + ".tmp"
+            with PathManager.open(tmp_file, "w") as f:
+                json.dump(coco_dict, f)
+            shutil.move(tmp_file, output_file)
+
+
+def register_coco_instances(name, metadata, json_file, image_root):
+    """
+    Register a dataset in COCO's json annotation format for
+    instance detection, instance segmentation and keypoint detection.
+    (i.e., Type 1 and 2 in http://cocodataset.org/#format-data.
+    `instances*.json` and `person_keypoints*.json` in the dataset).
+
+    This is an example of how to register a new dataset.
+    You can do something similar to this function, to register new datasets.
+
+    Args:
+        name (str): the name that identifies a dataset, e.g. "coco_2014_train".
+        metadata (dict): extra metadata associated with this dataset.  You can
+            leave it as an empty dict.
+        json_file (str): path to the json instance annotation file.
+        image_root (str or path-like): directory which contains all the images.
+    """
+    assert isinstance(name, str), name
+    assert isinstance(json_file, (str, os.PathLike)), json_file
+    assert isinstance(image_root, (str, os.PathLike)), image_root
+    # 1. register a function which returns dicts
+    DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name))
+
+    # 2. Optionally, add metadata about this dataset,
+    # since they might be useful in evaluation, visualization or logging
+    MetadataCatalog.get(name).set(
+        json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata
+    )
+
+
+if __name__ == "__main__":
+    """
+    Test the COCO json dataset loader.
+
+    Usage:
+        python -m detectron2.data.datasets.coco \
+            path/to/json path/to/image_root dataset_name
+
+        "dataset_name" can be "coco_2014_minival_100", or other
+        pre-registered ones
+    """
+    from detectron2.utils.logger import setup_logger
+    from detectron2.utils.visualizer import Visualizer
+    import detectron2.data.datasets  # noqa # add pre-defined metadata
+    import sys
+
+    logger = setup_logger(name=__name__)
+    assert sys.argv[3] in DatasetCatalog.list()
+    meta = MetadataCatalog.get(sys.argv[3])
+
+    dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3])
+    logger.info("Done loading {} samples.".format(len(dicts)))
+
+    dirname = "coco-data-vis"
+    os.makedirs(dirname, exist_ok=True)
+    for d in dicts:
+        img = np.array(Image.open(d["file_name"]))
+        visualizer = Visualizer(img, metadata=meta)
+        vis = visualizer.draw_dataset_dict(d)
+        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+        vis.save(fpath)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/coco_panoptic.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/coco_panoptic.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8dae44317b556610d7fed39017e082d7e855956
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/coco_panoptic.py
@@ -0,0 +1,228 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import json
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.file_io import PathManager
+
+from .coco import load_coco_json, load_sem_seg
+
+__all__ = ["register_coco_panoptic", "register_coco_panoptic_separated"]
+
+
+def load_coco_panoptic_json(json_file, image_dir, gt_dir, meta):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/coco/train2017".
+        gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017".
+        json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json".
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+
+    def _convert_category_id(segment_info, meta):
+        if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+            segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = True
+        else:
+            segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = False
+        return segment_info
+
+    with PathManager.open(json_file) as f:
+        json_info = json.load(f)
+
+    ret = []
+    for ann in json_info["annotations"]:
+        image_id = int(ann["image_id"])
+        # TODO: currently we assume image and label has the same filename but
+        # different extension, and images have extension ".jpg" for COCO. Need
+        # to make image extension a user-provided argument if we extend this
+        # function to support other COCO-like datasets.
+        image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg")
+        label_file = os.path.join(gt_dir, ann["file_name"])
+        segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]]
+        ret.append(
+            {
+                "file_name": image_file,
+                "image_id": image_id,
+                "pan_seg_file_name": label_file,
+                "segments_info": segments_info,
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"]
+    assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"]
+    return ret
+
+
+def register_coco_panoptic(
+    name, metadata, image_root, panoptic_root, panoptic_json, instances_json=None
+):
+    """
+    Register a "standard" version of COCO panoptic segmentation dataset named `name`.
+    The dictionaries in this registered dataset follows detectron2's standard format.
+    Hence it's called "standard".
+
+    Args:
+        name (str): the name that identifies a dataset,
+            e.g. "coco_2017_train_panoptic"
+        metadata (dict): extra metadata associated with this dataset.
+        image_root (str): directory which contains all the images
+        panoptic_root (str): directory which contains panoptic annotation images in COCO format
+        panoptic_json (str): path to the json panoptic annotation file in COCO format
+        sem_seg_root (none): not used, to be consistent with
+            `register_coco_panoptic_separated`.
+        instances_json (str): path to the json instance annotation file
+    """
+    panoptic_name = name
+    DatasetCatalog.register(
+        panoptic_name,
+        lambda: load_coco_panoptic_json(panoptic_json, image_root, panoptic_root, metadata),
+    )
+    MetadataCatalog.get(panoptic_name).set(
+        panoptic_root=panoptic_root,
+        image_root=image_root,
+        panoptic_json=panoptic_json,
+        json_file=instances_json,
+        evaluator_type="coco_panoptic_seg",
+        ignore_label=255,
+        label_divisor=1000,
+        **metadata,
+    )
+
+
+def register_coco_panoptic_separated(
+    name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json
+):
+    """
+    Register a "separated" version of COCO panoptic segmentation dataset named `name`.
+    The annotations in this registered dataset will contain both instance annotations and
+    semantic annotations, each with its own contiguous ids. Hence it's called "separated".
+
+    It follows the setting used by the PanopticFPN paper:
+
+    1. The instance annotations directly come from polygons in the COCO
+       instances annotation task, rather than from the masks in the COCO panoptic annotations.
+
+       The two format have small differences:
+       Polygons in the instance annotations may have overlaps.
+       The mask annotations are produced by labeling the overlapped polygons
+       with depth ordering.
+
+    2. The semantic annotations are converted from panoptic annotations, where
+       all "things" are assigned a semantic id of 0.
+       All semantic categories will therefore have ids in contiguous
+       range [1, #stuff_categories].
+
+    This function will also register a pure semantic segmentation dataset
+    named ``name + '_stuffonly'``.
+
+    Args:
+        name (str): the name that identifies a dataset,
+            e.g. "coco_2017_train_panoptic"
+        metadata (dict): extra metadata associated with this dataset.
+        image_root (str): directory which contains all the images
+        panoptic_root (str): directory which contains panoptic annotation images
+        panoptic_json (str): path to the json panoptic annotation file
+        sem_seg_root (str): directory which contains all the ground truth segmentation annotations.
+        instances_json (str): path to the json instance annotation file
+    """
+    panoptic_name = name + "_separated"
+    DatasetCatalog.register(
+        panoptic_name,
+        lambda: merge_to_panoptic(
+            load_coco_json(instances_json, image_root, panoptic_name),
+            load_sem_seg(sem_seg_root, image_root),
+        ),
+    )
+    MetadataCatalog.get(panoptic_name).set(
+        panoptic_root=panoptic_root,
+        image_root=image_root,
+        panoptic_json=panoptic_json,
+        sem_seg_root=sem_seg_root,
+        json_file=instances_json,  # TODO rename
+        evaluator_type="coco_panoptic_seg",
+        ignore_label=255,
+        **metadata,
+    )
+
+    semantic_name = name + "_stuffonly"
+    DatasetCatalog.register(semantic_name, lambda: load_sem_seg(sem_seg_root, image_root))
+    MetadataCatalog.get(semantic_name).set(
+        sem_seg_root=sem_seg_root,
+        image_root=image_root,
+        evaluator_type="sem_seg",
+        ignore_label=255,
+        **metadata,
+    )
+
+
+def merge_to_panoptic(detection_dicts, sem_seg_dicts):
+    """
+    Create dataset dicts for panoptic segmentation, by
+    merging two dicts using "file_name" field to match their entries.
+
+    Args:
+        detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation.
+        sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation.
+
+    Returns:
+        list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in
+            both detection_dicts and sem_seg_dicts that correspond to the same image.
+            The function assumes that the same key in different dicts has the same value.
+    """
+    results = []
+    sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts}
+    assert len(sem_seg_file_to_entry) > 0
+
+    for det_dict in detection_dicts:
+        dic = copy.copy(det_dict)
+        dic.update(sem_seg_file_to_entry[dic["file_name"]])
+        results.append(dic)
+    return results
+
+
+if __name__ == "__main__":
+    """
+    Test the COCO panoptic dataset loader.
+
+    Usage:
+        python -m detectron2.data.datasets.coco_panoptic \
+            path/to/image_root path/to/panoptic_root path/to/panoptic_json dataset_name 10
+
+        "dataset_name" can be "coco_2017_train_panoptic", or other
+        pre-registered ones
+    """
+    from detectron2.utils.logger import setup_logger
+    from detectron2.utils.visualizer import Visualizer
+    import detectron2.data.datasets  # noqa # add pre-defined metadata
+    import sys
+    from PIL import Image
+    import numpy as np
+
+    logger = setup_logger(name=__name__)
+    assert sys.argv[4] in DatasetCatalog.list()
+    meta = MetadataCatalog.get(sys.argv[4])
+
+    dicts = load_coco_panoptic_json(sys.argv[3], sys.argv[1], sys.argv[2], meta.as_dict())
+    logger.info("Done loading {} samples.".format(len(dicts)))
+
+    dirname = "coco-data-vis"
+    os.makedirs(dirname, exist_ok=True)
+    num_imgs_to_vis = int(sys.argv[5])
+    for i, d in enumerate(dicts):
+        img = np.array(Image.open(d["file_name"]))
+        visualizer = Visualizer(img, metadata=meta)
+        vis = visualizer.draw_dataset_dict(d)
+        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+        vis.save(fpath)
+        if i + 1 >= num_imgs_to_vis:
+            break
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/lvis.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/lvis.py
new file mode 100644
index 0000000000000000000000000000000000000000..e663feb00a69c6763f09e731a828dd35161e6d3a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/lvis.py
@@ -0,0 +1,228 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import os
+from fvcore.common.timer import Timer
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+from detectron2.utils.file_io import PathManager
+
+from .builtin_meta import _get_coco_instances_meta
+from .lvis_v0_5_categories import LVIS_CATEGORIES as LVIS_V0_5_CATEGORIES
+from .lvis_v1_categories import LVIS_CATEGORIES as LVIS_V1_CATEGORIES
+
+"""
+This file contains functions to parse LVIS-format annotations into dicts in the
+"Detectron2 format".
+"""
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["load_lvis_json", "register_lvis_instances", "get_lvis_instances_meta"]
+
+
+def register_lvis_instances(name, metadata, json_file, image_root):
+    """
+    Register a dataset in LVIS's json annotation format for instance detection and segmentation.
+
+    Args:
+        name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train".
+        metadata (dict): extra metadata associated with this dataset. It can be an empty dict.
+        json_file (str): path to the json instance annotation file.
+        image_root (str or path-like): directory which contains all the images.
+    """
+    DatasetCatalog.register(name, lambda: load_lvis_json(json_file, image_root, name))
+    MetadataCatalog.get(name).set(
+        json_file=json_file, image_root=image_root, evaluator_type="lvis", **metadata
+    )
+
+
+def load_lvis_json(json_file, image_root, dataset_name=None):
+    """
+    Load a json file in LVIS's annotation format.
+
+    Args:
+        json_file (str): full path to the LVIS json annotation file.
+        image_root (str): the directory where the images in this json file exists.
+        dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train").
+            If provided, this function will put "thing_classes" into the metadata
+            associated with this dataset.
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+
+    Notes:
+        1. This function does not read the image files.
+           The results do not have the "image" field.
+    """
+    from lvis import LVIS
+
+    json_file = PathManager.get_local_path(json_file)
+
+    timer = Timer()
+    lvis_api = LVIS(json_file)
+    if timer.seconds() > 1:
+        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+
+    if dataset_name is not None:
+        meta = get_lvis_instances_meta(dataset_name)
+        MetadataCatalog.get(dataset_name).set(**meta)
+
+    # sort indices for reproducible results
+    img_ids = sorted(lvis_api.imgs.keys())
+    # imgs is a list of dicts, each looks something like:
+    # {'license': 4,
+    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+    #  'file_name': 'COCO_val2014_000000001268.jpg',
+    #  'height': 427,
+    #  'width': 640,
+    #  'date_captured': '2013-11-17 05:57:24',
+    #  'id': 1268}
+    imgs = lvis_api.load_imgs(img_ids)
+    # anns is a list[list[dict]], where each dict is an annotation
+    # record for an object. The inner list enumerates the objects in an image
+    # and the outer list enumerates over images. Example of anns[0]:
+    # [{'segmentation': [[192.81,
+    #     247.09,
+    #     ...
+    #     219.03,
+    #     249.06]],
+    #   'area': 1035.749,
+    #   'image_id': 1268,
+    #   'bbox': [192.81, 224.8, 74.73, 33.43],
+    #   'category_id': 16,
+    #   'id': 42986},
+    #  ...]
+    anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
+
+    # Sanity check that each annotation has a unique id
+    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+    assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique".format(
+        json_file
+    )
+
+    imgs_anns = list(zip(imgs, anns))
+
+    logger.info("Loaded {} images in the LVIS format from {}".format(len(imgs_anns), json_file))
+
+    def get_file_name(img_root, img_dict):
+        # Determine the path including the split folder ("train2017", "val2017", "test2017") from
+        # the coco_url field. Example:
+        #   'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg'
+        split_folder, file_name = img_dict["coco_url"].split("/")[-2:]
+        return os.path.join(img_root + split_folder, file_name)
+
+    dataset_dicts = []
+
+    for (img_dict, anno_dict_list) in imgs_anns:
+        record = {}
+        record["file_name"] = get_file_name(image_root, img_dict)
+        record["height"] = img_dict["height"]
+        record["width"] = img_dict["width"]
+        record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", [])
+        record["neg_category_ids"] = img_dict.get("neg_category_ids", [])
+        image_id = record["image_id"] = img_dict["id"]
+
+        objs = []
+        for anno in anno_dict_list:
+            # Check that the image_id in this annotation is the same as
+            # the image_id we're looking at.
+            # This fails only when the data parsing logic or the annotation file is buggy.
+            assert anno["image_id"] == image_id
+            obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
+            # LVIS data loader can be used to load COCO dataset categories. In this case `meta`
+            # variable will have a field with COCO-specific category mapping.
+            if dataset_name is not None and "thing_dataset_id_to_contiguous_id" in meta:
+                obj["category_id"] = meta["thing_dataset_id_to_contiguous_id"][anno["category_id"]]
+            else:
+                obj["category_id"] = anno["category_id"] - 1  # Convert 1-indexed to 0-indexed
+            segm = anno["segmentation"]  # list[list[float]]
+            # filter out invalid polygons (< 3 points)
+            valid_segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+            assert len(segm) == len(
+                valid_segm
+            ), "Annotation contains an invalid polygon with < 3 points"
+            assert len(segm) > 0
+            obj["segmentation"] = segm
+            objs.append(obj)
+        record["annotations"] = objs
+        dataset_dicts.append(record)
+
+    return dataset_dicts
+
+
+def get_lvis_instances_meta(dataset_name):
+    """
+    Load LVIS metadata.
+
+    Args:
+        dataset_name (str): LVIS dataset name without the split name (e.g., "lvis_v0.5").
+
+    Returns:
+        dict: LVIS metadata with keys: thing_classes
+    """
+    if "cocofied" in dataset_name:
+        return _get_coco_instances_meta()
+    if "v0.5" in dataset_name:
+        return _get_lvis_instances_meta_v0_5()
+    elif "v1" in dataset_name:
+        return _get_lvis_instances_meta_v1()
+    raise ValueError("No built-in metadata for dataset {}".format(dataset_name))
+
+
+def _get_lvis_instances_meta_v0_5():
+    assert len(LVIS_V0_5_CATEGORIES) == 1230
+    cat_ids = [k["id"] for k in LVIS_V0_5_CATEGORIES]
+    assert min(cat_ids) == 1 and max(cat_ids) == len(
+        cat_ids
+    ), "Category ids are not in [1, #categories], as expected"
+    # Ensure that the category list is sorted by id
+    lvis_categories = sorted(LVIS_V0_5_CATEGORIES, key=lambda x: x["id"])
+    thing_classes = [k["synonyms"][0] for k in lvis_categories]
+    meta = {"thing_classes": thing_classes}
+    return meta
+
+
+def _get_lvis_instances_meta_v1():
+    assert len(LVIS_V1_CATEGORIES) == 1203
+    cat_ids = [k["id"] for k in LVIS_V1_CATEGORIES]
+    assert min(cat_ids) == 1 and max(cat_ids) == len(
+        cat_ids
+    ), "Category ids are not in [1, #categories], as expected"
+    # Ensure that the category list is sorted by id
+    lvis_categories = sorted(LVIS_V1_CATEGORIES, key=lambda x: x["id"])
+    thing_classes = [k["synonyms"][0] for k in lvis_categories]
+    meta = {"thing_classes": thing_classes}
+    return meta
+
+
+if __name__ == "__main__":
+    """
+    Test the LVIS json dataset loader.
+
+    Usage:
+        python -m detectron2.data.datasets.lvis \
+            path/to/json path/to/image_root dataset_name vis_limit
+    """
+    import sys
+    import numpy as np
+    from detectron2.utils.logger import setup_logger
+    from PIL import Image
+    import detectron2.data.datasets  # noqa # add pre-defined metadata
+    from detectron2.utils.visualizer import Visualizer
+
+    logger = setup_logger(name=__name__)
+    meta = MetadataCatalog.get(sys.argv[3])
+
+    dicts = load_lvis_json(sys.argv[1], sys.argv[2], sys.argv[3])
+    logger.info("Done loading {} samples.".format(len(dicts)))
+
+    dirname = "lvis-data-vis"
+    os.makedirs(dirname, exist_ok=True)
+    for d in dicts[: int(sys.argv[4])]:
+        img = np.array(Image.open(d["file_name"]))
+        visualizer = Visualizer(img, metadata=meta)
+        vis = visualizer.draw_dataset_dict(d)
+        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+        vis.save(fpath)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/lvis_v0_5_categories.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/lvis_v0_5_categories.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3dab6198da614937b08682f4c9edf52bdf1d236
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/lvis_v0_5_categories.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Autogen with
+# with open("lvis_v0.5_val.json", "r") as f:
+#     a = json.load(f)
+# c = a["categories"]
+# for x in c:
+#     del x["image_count"]
+#     del x["instance_count"]
+# LVIS_CATEGORIES = repr(c) + "  # noqa"
+
+# fmt: off
+LVIS_CATEGORIES = [{'frequency': 'r', 'id': 1, 'synset': 'acorn.n.01', 'synonyms': ['acorn'], 'def': 'nut from an oak tree', 'name': 'acorn'}, {'frequency': 'c', 'id': 2, 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'id': 3, 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'id': 4, 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'c', 'id': 5, 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'id': 6, 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'r', 'id': 7, 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'id': 8, 'synset': 'almond.n.02', 'synonyms': ['almond'], 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'id': 9, 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'r', 'id': 10, 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'id': 11, 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'id': 12, 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'id': 13, 'synset': 'apple.n.01', 'synonyms': ['apple'], 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'id': 14, 'synset': 'apple_juice.n.01', 'synonyms': ['apple_juice'], 'def': 'the juice of apples', 'name': 'apple_juice'}, {'frequency': 'r', 'id': 15, 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'id': 16, 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'id': 17, 'synset': 'apron.n.01', 'synonyms': ['apron'], 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'id': 18, 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'c', 'id': 19, 'synset': 'armband.n.02', 'synonyms': ['armband'], 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'id': 20, 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'id': 21, 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'id': 22, 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'id': 23, 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'id': 24, 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'id': 25, 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'id': 26, 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'id': 27, 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'c', 'id': 28, 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'id': 29, 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'id': 30, 'synset': 'awning.n.01', 'synonyms': ['awning'], 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'id': 31, 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'f', 'id': 32, 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'id': 33, 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'id': 34, 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'id': 35, 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'id': 36, 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'id': 37, 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'id': 38, 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'id': 39, 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'id': 40, 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'id': 41, 'synset': 'ball.n.06', 'synonyms': ['ball'], 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'id': 42, 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'id': 43, 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'id': 44, 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'id': 45, 'synset': 'banana.n.02', 'synonyms': ['banana'], 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'r', 'id': 46, 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'id': 47, 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'c', 'id': 48, 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'id': 49, 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'id': 50, 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'id': 51, 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'id': 52, 'synset': 'barge.n.01', 'synonyms': ['barge'], 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'id': 53, 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'id': 54, 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'id': 55, 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'id': 56, 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'id': 57, 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'id': 58, 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'id': 59, 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'id': 60, 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'id': 61, 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'id': 62, 'synset': 'basket.n.03', 'synonyms': ['basketball_hoop'], 'def': 'metal hoop supporting a net through which players try to throw the basketball', 'name': 'basketball_hoop'}, {'frequency': 'c', 'id': 63, 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'id': 64, 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'r', 'id': 65, 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'id': 66, 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'id': 67, 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'id': 68, 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'id': 69, 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'id': 70, 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'id': 71, 'synset': 'battery.n.02', 'synonyms': ['battery'], 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'id': 72, 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'id': 73, 'synset': 'bead.n.01', 'synonyms': ['bead'], 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'r', 'id': 74, 'synset': 'beaker.n.01', 'synonyms': ['beaker'], 'def': 'a flatbottomed jar made of glass or plastic; used for chemistry', 'name': 'beaker'}, {'frequency': 'c', 'id': 75, 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'id': 76, 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'id': 77, 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'id': 78, 'synset': 'bear.n.01', 'synonyms': ['bear'], 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'id': 79, 'synset': 'bed.n.01', 'synonyms': ['bed'], 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'c', 'id': 80, 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'id': 81, 'synset': 'beef.n.01', 'synonyms': ['cow'], 'def': 'cattle that are reared for their meat', 'name': 'cow'}, {'frequency': 'c', 'id': 82, 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'id': 83, 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'id': 84, 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'id': 85, 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'id': 86, 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'id': 87, 'synset': 'bell.n.01', 'synonyms': ['bell'], 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'id': 88, 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'id': 89, 'synset': 'belt.n.02', 'synonyms': ['belt'], 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'id': 90, 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'id': 91, 'synset': 'bench.n.01', 'synonyms': ['bench'], 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'id': 92, 'synset': 'beret.n.01', 'synonyms': ['beret'], 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'id': 93, 'synset': 'bib.n.02', 'synonyms': ['bib'], 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'id': 94, 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'id': 95, 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'id': 96, 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'c', 'id': 97, 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'id': 98, 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'id': 99, 'synset': 'bird.n.01', 'synonyms': ['bird'], 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'r', 'id': 100, 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'r', 'id': 101, 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'id': 102, 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'id': 103, 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'id': 104, 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'id': 105, 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'id': 106, 'synset': 'biscuit.n.01', 'synonyms': ['biscuit_(bread)'], 'def': 'small round bread leavened with baking-powder or soda', 'name': 'biscuit_(bread)'}, {'frequency': 'r', 'id': 107, 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'id': 108, 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'id': 109, 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'id': 110, 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'id': 111, 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'id': 112, 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'id': 113, 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'c', 'id': 114, 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'c', 'id': 115, 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'id': 116, 'synset': 'boar.n.02', 'synonyms': ['boar'], 'def': 'an uncastrated male hog', 'name': 'boar'}, {'frequency': 'r', 'id': 117, 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'id': 118, 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'c', 'id': 119, 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'r', 'id': 120, 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'id': 121, 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'id': 122, 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'id': 123, 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'id': 124, 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'id': 125, 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'id': 126, 'synset': 'book.n.01', 'synonyms': ['book'], 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'r', 'id': 127, 'synset': 'book_bag.n.01', 'synonyms': ['book_bag'], 'def': 'a bag in which students carry their books', 'name': 'book_bag'}, {'frequency': 'c', 'id': 128, 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'id': 129, 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'id': 130, 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'id': 131, 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'id': 132, 'synset': 'boot.n.01', 'synonyms': ['boot'], 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'id': 133, 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'id': 134, 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'id': 135, 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'id': 136, 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'id': 137, 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'id': 138, 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'id': 139, 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'id': 140, 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'id': 141, 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'id': 142, 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'r', 'id': 143, 'synset': 'bowling_pin.n.01', 'synonyms': ['bowling_pin'], 'def': 'a club-shaped wooden object used in bowling', 'name': 'bowling_pin'}, {'frequency': 'r', 'id': 144, 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'id': 145, 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'id': 146, 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'id': 147, 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'id': 148, 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'id': 149, 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'r', 'id': 150, 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'c', 'id': 151, 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'id': 152, 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'c', 'id': 153, 'synset': 'bristle_brush.n.01', 'synonyms': ['bristle_brush'], 'def': 'a brush that is made with the short stiff hairs of an animal or plant', 'name': 'bristle_brush'}, {'frequency': 'f', 'id': 154, 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'id': 155, 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'id': 156, 'synset': 'broom.n.01', 'synonyms': ['broom'], 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'id': 157, 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'id': 158, 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'id': 159, 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'id': 160, 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'id': 161, 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'id': 162, 'synset': 'bull.n.11', 'synonyms': ['bull'], 'def': 'mature male cow', 'name': 'bull'}, {'frequency': 'r', 'id': 163, 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'id': 164, 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'id': 165, 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'id': 166, 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'id': 167, 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'id': 168, 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'r', 'id': 169, 'synset': 'bully_beef.n.01', 'synonyms': ['corned_beef', 'corn_beef'], 'def': 'beef cured or pickled in brine', 'name': 'corned_beef'}, {'frequency': 'f', 'id': 170, 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'id': 171, 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'id': 172, 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'id': 173, 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'id': 174, 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'id': 175, 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'c', 'id': 176, 'synset': 'butcher_knife.n.01', 'synonyms': ['butcher_knife'], 'def': 'a large sharp knife for cutting or trimming meat', 'name': 'butcher_knife'}, {'frequency': 'c', 'id': 177, 'synset': 'butter.n.01', 'synonyms': ['butter'], 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'id': 178, 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'id': 179, 'synset': 'button.n.01', 'synonyms': ['button'], 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'id': 180, 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'id': 181, 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'r', 'id': 182, 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'id': 183, 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'id': 184, 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'id': 185, 'synset': 'cake.n.03', 'synonyms': ['cake'], 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'id': 186, 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'id': 187, 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'id': 188, 'synset': 'calf.n.01', 'synonyms': ['calf'], 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'id': 189, 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'id': 190, 'synset': 'camel.n.01', 'synonyms': ['camel'], 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'id': 191, 'synset': 'camera.n.01', 'synonyms': ['camera'], 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'id': 192, 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'id': 193, 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'id': 194, 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'id': 195, 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'r', 'id': 196, 'synset': 'candelabrum.n.01', 'synonyms': ['candelabrum', 'candelabra'], 'def': 'branched candlestick; ornamental; has several lights', 'name': 'candelabrum'}, {'frequency': 'f', 'id': 197, 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'id': 198, 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'id': 199, 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'id': 200, 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'id': 201, 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'id': 202, 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'r', 'id': 203, 'synset': 'cannon.n.02', 'synonyms': ['cannon'], 'def': 'heavy gun fired from a tank', 'name': 'cannon'}, {'frequency': 'c', 'id': 204, 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'r', 'id': 205, 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'id': 206, 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'c', 'id': 207, 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'id': 208, 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'r', 'id': 209, 'synset': 'cape.n.02', 'synonyms': ['cape'], 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'id': 210, 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'id': 211, 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'id': 212, 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'def': 'a wheeled vehicle adapted to the rails of railroad', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'id': 213, 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'id': 214, 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'id': 215, 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'id': 216, 'synset': 'card.n.03', 'synonyms': ['card'], 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'r', 'id': 217, 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'id': 218, 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'id': 219, 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'id': 220, 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'id': 221, 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'c', 'id': 222, 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'id': 223, 'synset': 'cart.n.01', 'synonyms': ['cart'], 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'id': 224, 'synset': 'carton.n.02', 'synonyms': ['carton'], 'def': 'a box made of cardboard; opens by flaps on top', 'name': 'carton'}, {'frequency': 'c', 'id': 225, 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'id': 226, 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'id': 227, 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'id': 228, 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'id': 229, 'synset': 'cat.n.01', 'synonyms': ['cat'], 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'c', 'id': 230, 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'r', 'id': 231, 'synset': 'caviar.n.01', 'synonyms': ['caviar', 'caviare'], 'def': "salted roe of sturgeon or other large fish; usually served as an hors d'oeuvre", 'name': 'caviar'}, {'frequency': 'c', 'id': 232, 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'id': 233, 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'c', 'id': 234, 'synset': 'celery.n.01', 'synonyms': ['celery'], 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'id': 235, 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'id': 236, 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'id': 237, 'synset': 'chair.n.01', 'synonyms': ['chair'], 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'id': 238, 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'id': 239, 'synset': 'champagne.n.01', 'synonyms': ['champagne'], 'def': 'a white sparkling wine produced in Champagne or resembling that produced there', 'name': 'champagne'}, {'frequency': 'f', 'id': 240, 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'id': 241, 'synset': 'chap.n.04', 'synonyms': ['chap'], 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'id': 242, 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'id': 243, 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'id': 244, 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'id': 245, 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'r', 'id': 246, 'synset': 'chest_of_drawers.n.01', 'synonyms': ['chest_of_drawers_(furniture)', 'bureau_(furniture)', 'chest_(furniture)'], 'def': 'furniture with drawers for keeping clothes', 'name': 'chest_of_drawers_(furniture)'}, {'frequency': 'c', 'id': 247, 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'id': 248, 'synset': 'chicken_wire.n.01', 'synonyms': ['chicken_wire'], 'def': 'a galvanized wire network with a hexagonal mesh; used to build fences', 'name': 'chicken_wire'}, {'frequency': 'r', 'id': 249, 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'r', 'id': 250, 'synset': 'chihuahua.n.03', 'synonyms': ['Chihuahua'], 'def': 'an old breed of tiny short-haired dog with protruding eyes from Mexico', 'name': 'Chihuahua'}, {'frequency': 'r', 'id': 251, 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'id': 252, 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'id': 253, 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'id': 254, 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'id': 255, 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'id': 256, 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'id': 257, 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'id': 258, 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'id': 259, 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'id': 260, 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'def': 'necklace that fits tightly around the neck', 'name': 'choker'}, {'frequency': 'f', 'id': 261, 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'c', 'id': 262, 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'id': 263, 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'id': 264, 'synset': 'chute.n.02', 'synonyms': ['slide'], 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'id': 265, 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'id': 266, 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'c', 'id': 267, 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'id': 268, 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'id': 269, 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'id': 270, 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'r', 'id': 271, 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'id': 272, 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'id': 273, 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'id': 274, 'synset': 'clip.n.03', 'synonyms': ['clip'], 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'id': 275, 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'f', 'id': 276, 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'id': 277, 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'id': 278, 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'id': 279, 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'id': 280, 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'id': 281, 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'id': 282, 'synset': 'coat.n.01', 'synonyms': ['coat'], 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'id': 283, 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'r', 'id': 284, 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'id': 285, 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'c', 'id': 286, 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'r', 'id': 287, 'synset': 'coffee_filter.n.01', 'synonyms': ['coffee_filter'], 'def': 'filter (usually of paper) that passes the coffee and retains the coffee grounds', 'name': 'coffee_filter'}, {'frequency': 'f', 'id': 288, 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'id': 289, 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'id': 290, 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'id': 291, 'synset': 'coil.n.05', 'synonyms': ['coil'], 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'id': 292, 'synset': 'coin.n.01', 'synonyms': ['coin'], 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'r', 'id': 293, 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'id': 294, 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'id': 295, 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'id': 296, 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'id': 297, 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'id': 298, 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'f', 'id': 299, 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'r', 'id': 300, 'synset': 'concrete_mixer.n.01', 'synonyms': ['concrete_mixer', 'cement_mixer'], 'def': 'a machine with a large revolving drum in which cement/concrete is mixed', 'name': 'concrete_mixer'}, {'frequency': 'f', 'id': 301, 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'id': 302, 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'id': 303, 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'id': 304, 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'c', 'id': 305, 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'id': 306, 'synset': 'cookie_jar.n.01', 'synonyms': ['cookie_jar', 'cooky_jar'], 'def': 'a jar in which cookies are kept (and sometimes money is hidden)', 'name': 'cookie_jar'}, {'frequency': 'r', 'id': 307, 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'id': 308, 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'c', 'id': 309, 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'id': 310, 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'r', 'id': 311, 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'c', 'id': 312, 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'def': 'ears of corn that can be prepared and served for human food', 'name': 'edible_corn'}, {'frequency': 'r', 'id': 313, 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'id': 314, 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'id': 315, 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'id': 316, 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'r', 'id': 317, 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'r', 'id': 318, 'synset': 'cos.n.02', 'synonyms': ['romaine_lettuce'], 'def': 'lettuce with long dark-green leaves in a loosely packed elongated head', 'name': 'romaine_lettuce'}, {'frequency': 'c', 'id': 319, 'synset': 'costume.n.04', 'synonyms': ['costume'], 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'id': 320, 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'id': 321, 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'r', 'id': 322, 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'id': 323, 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'r', 'id': 324, 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'c', 'id': 325, 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'id': 326, 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'id': 327, 'synset': 'crate.n.01', 'synonyms': ['crate'], 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'r', 'id': 328, 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'id': 329, 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'r', 'id': 330, 'synset': 'credit_card.n.01', 'synonyms': ['credit_card', 'charge_card', 'debit_card'], 'def': 'a card, usually plastic, used to pay for goods and services', 'name': 'credit_card'}, {'frequency': 'c', 'id': 331, 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'id': 332, 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'id': 333, 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'def': 'an earthen jar (made of baked clay)', 'name': 'crock_pot'}, {'frequency': 'f', 'id': 334, 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'id': 335, 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'r', 'id': 336, 'synset': 'crow.n.01', 'synonyms': ['crow'], 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'c', 'id': 337, 'synset': 'crown.n.04', 'synonyms': ['crown'], 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'id': 338, 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'id': 339, 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'id': 340, 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'c', 'id': 341, 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'r', 'id': 342, 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'id': 343, 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'r', 'id': 344, 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'id': 345, 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'id': 346, 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'id': 347, 'synset': 'cup.n.01', 'synonyms': ['cup'], 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'id': 348, 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'def': 'a metal vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'c', 'id': 349, 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'id': 350, 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'id': 351, 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'id': 352, 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'id': 353, 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'id': 354, 'synset': 'custard.n.01', 'synonyms': ['custard'], 'def': 'sweetened mixture of milk and eggs baked or boiled or frozen', 'name': 'custard'}, {'frequency': 'c', 'id': 355, 'synset': 'cutter.n.06', 'synonyms': ['cutting_tool'], 'def': 'a cutting implement; a tool for cutting', 'name': 'cutting_tool'}, {'frequency': 'r', 'id': 356, 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'id': 357, 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'id': 358, 'synset': 'dachshund.n.01', 'synonyms': ['dachshund', 'dachsie', 'badger_dog'], 'def': 'small long-bodied short-legged breed of dog having a short sleek coat and long drooping ears', 'name': 'dachshund'}, {'frequency': 'r', 'id': 359, 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'id': 360, 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'id': 361, 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'id': 362, 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'id': 363, 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'id': 364, 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'id': 365, 'synset': 'desk.n.01', 'synonyms': ['desk'], 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'id': 366, 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'id': 367, 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'id': 368, 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'def': 'a daily written record of (usually personal) experiences and observations', 'name': 'diary'}, {'frequency': 'r', 'id': 369, 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'id': 370, 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'id': 371, 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'id': 372, 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'c', 'id': 373, 'synset': 'dish.n.01', 'synonyms': ['dish'], 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'id': 374, 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'id': 375, 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'def': 'a cloth for washing dishes', 'name': 'dishrag'}, {'frequency': 'c', 'id': 376, 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'id': 377, 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'id': 378, 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid'], 'def': 'a low-sudsing detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'r', 'id': 379, 'synset': 'diskette.n.01', 'synonyms': ['diskette', 'floppy', 'floppy_disk'], 'def': 'a small plastic magnetic disk enclosed in a stiff envelope used to store data', 'name': 'diskette'}, {'frequency': 'c', 'id': 380, 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'c', 'id': 381, 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'id': 382, 'synset': 'dog.n.01', 'synonyms': ['dog'], 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'id': 383, 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'c', 'id': 384, 'synset': 'doll.n.01', 'synonyms': ['doll'], 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'id': 385, 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'id': 386, 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'id': 387, 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'r', 'id': 388, 'synset': 'domino.n.03', 'synonyms': ['eye_mask'], 'def': 'a mask covering the upper part of the face but with holes for the eyes', 'name': 'eye_mask'}, {'frequency': 'r', 'id': 389, 'synset': 'doorbell.n.01', 'synonyms': ['doorbell', 'buzzer'], 'def': 'a button at an outer door that gives a ringing or buzzing signal when pushed', 'name': 'doorbell'}, {'frequency': 'f', 'id': 390, 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'id': 391, 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'id': 392, 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'id': 393, 'synset': 'dove.n.01', 'synonyms': ['dove'], 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'id': 394, 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'id': 395, 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'id': 396, 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'id': 397, 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'id': 398, 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'c', 'id': 399, 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'c', 'id': 400, 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'id': 401, 'synset': 'drill.n.01', 'synonyms': ['drill'], 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'id': 402, 'synset': 'drinking_fountain.n.01', 'synonyms': ['drinking_fountain'], 'def': 'a public fountain to provide a jet of drinking water', 'name': 'drinking_fountain'}, {'frequency': 'r', 'id': 403, 'synset': 'drone.n.04', 'synonyms': ['drone'], 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'id': 404, 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'id': 405, 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'id': 406, 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'id': 407, 'synset': 'duck.n.01', 'synonyms': ['duck'], 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'r', 'id': 408, 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'id': 409, 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'id': 410, 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'def': 'a large cylindrical bag of heavy cloth', 'name': 'duffel_bag'}, {'frequency': 'r', 'id': 411, 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'id': 412, 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'id': 413, 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'r', 'id': 414, 'synset': 'dutch_oven.n.02', 'synonyms': ['Dutch_oven'], 'def': 'iron or earthenware cooking pot; used for stews', 'name': 'Dutch_oven'}, {'frequency': 'c', 'id': 415, 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'id': 416, 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'id': 417, 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'id': 418, 'synset': 'earring.n.01', 'synonyms': ['earring'], 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'id': 419, 'synset': 'easel.n.01', 'synonyms': ['easel'], 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'id': 420, 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'id': 421, 'synset': 'eel.n.01', 'synonyms': ['eel'], 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'id': 422, 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'id': 423, 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'id': 424, 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'id': 425, 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'id': 426, 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'id': 427, 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'id': 428, 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'id': 429, 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'r', 'id': 430, 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'id': 431, 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'id': 432, 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'id': 433, 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'id': 434, 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'id': 435, 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'id': 436, 'synset': 'fan.n.01', 'synonyms': ['fan'], 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'id': 437, 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'id': 438, 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'id': 439, 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'id': 440, 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'r', 'id': 441, 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'id': 442, 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'id': 443, 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'id': 444, 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'id': 445, 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'id': 446, 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'id': 447, 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'c', 'id': 448, 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'c', 'id': 449, 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'id': 450, 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'id': 451, 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'id': 452, 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'c', 'id': 453, 'synset': 'fish.n.01', 'synonyms': ['fish'], 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'r', 'id': 454, 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'id': 455, 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'r', 'id': 456, 'synset': 'fishing_boat.n.01', 'synonyms': ['fishing_boat', 'fishing_vessel'], 'def': 'a vessel for fishing', 'name': 'fishing_boat'}, {'frequency': 'c', 'id': 457, 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'id': 458, 'synset': 'flag.n.01', 'synonyms': ['flag'], 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'id': 459, 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'id': 460, 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'id': 461, 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'r', 'id': 462, 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'id': 463, 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'id': 464, 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'id': 465, 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'id': 466, 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'id': 467, 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'id': 468, 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'r', 'id': 469, 'synset': 'foal.n.01', 'synonyms': ['foal'], 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'id': 470, 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'id': 471, 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'id': 472, 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'id': 473, 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'id': 474, 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'id': 475, 'synset': 'fork.n.01', 'synonyms': ['fork'], 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'r', 'id': 476, 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'r', 'id': 477, 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'r', 'id': 478, 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'id': 479, 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'def': 'anything that freshens', 'name': 'freshener'}, {'frequency': 'f', 'id': 480, 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'id': 481, 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'id': 482, 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'r', 'id': 483, 'synset': 'fruit_salad.n.01', 'synonyms': ['fruit_salad'], 'def': 'salad composed of fruits', 'name': 'fruit_salad'}, {'frequency': 'c', 'id': 484, 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'id': 485, 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'id': 486, 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'c', 'id': 487, 'synset': 'futon.n.01', 'synonyms': ['futon'], 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'id': 488, 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'id': 489, 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'id': 490, 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'id': 491, 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'id': 492, 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'id': 493, 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'id': 494, 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'id': 495, 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'r', 'id': 496, 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'id': 497, 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'id': 498, 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'c', 'id': 499, 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'id': 500, 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'id': 501, 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'id': 502, 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'id': 503, 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'id': 504, 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'id': 505, 'synset': 'globe.n.03', 'synonyms': ['globe'], 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'id': 506, 'synset': 'glove.n.02', 'synonyms': ['glove'], 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'id': 507, 'synset': 'goat.n.01', 'synonyms': ['goat'], 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'id': 508, 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'id': 509, 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'r', 'id': 510, 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'id': 511, 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'id': 512, 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'id': 513, 'synset': 'goose.n.01', 'synonyms': ['goose'], 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'id': 514, 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'id': 515, 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'r', 'id': 516, 'synset': 'gown.n.04', 'synonyms': ['surgical_gown', 'scrubs_(surgical_clothing)'], 'def': 'protective garment worn by surgeons during operations', 'name': 'surgical_gown'}, {'frequency': 'f', 'id': 517, 'synset': 'grape.n.01', 'synonyms': ['grape'], 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'r', 'id': 518, 'synset': 'grasshopper.n.01', 'synonyms': ['grasshopper'], 'def': 'plant-eating insect with hind legs adapted for leaping', 'name': 'grasshopper'}, {'frequency': 'c', 'id': 519, 'synset': 'grater.n.01', 'synonyms': ['grater'], 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'id': 520, 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'id': 521, 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'c', 'id': 522, 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'c', 'id': 523, 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'id': 524, 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'r', 'id': 525, 'synset': 'grillroom.n.01', 'synonyms': ['grillroom', 'grill_(restaurant)'], 'def': 'a restaurant where food is cooked on a grill', 'name': 'grillroom'}, {'frequency': 'r', 'id': 526, 'synset': 'grinder.n.04', 'synonyms': ['grinder_(tool)'], 'def': 'a machine tool that polishes metal', 'name': 'grinder_(tool)'}, {'frequency': 'r', 'id': 527, 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'id': 528, 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'id': 529, 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'r', 'id': 530, 'synset': 'guacamole.n.01', 'synonyms': ['guacamole'], 'def': 'a dip made of mashed avocado mixed with chopped onions and other seasonings', 'name': 'guacamole'}, {'frequency': 'f', 'id': 531, 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'id': 532, 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'id': 533, 'synset': 'gun.n.01', 'synonyms': ['gun'], 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'r', 'id': 534, 'synset': 'hair_spray.n.01', 'synonyms': ['hair_spray'], 'def': 'substance sprayed on the hair to hold it in place', 'name': 'hair_spray'}, {'frequency': 'c', 'id': 535, 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'id': 536, 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'id': 537, 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'f', 'id': 538, 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'id': 539, 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'id': 540, 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'r', 'id': 541, 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'id': 542, 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'r', 'id': 543, 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'c', 'id': 544, 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'id': 545, 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'id': 546, 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'id': 547, 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'id': 548, 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'id': 549, 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'id': 550, 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'id': 551, 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'id': 552, 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'id': 553, 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'id': 554, 'synset': 'hat.n.01', 'synonyms': ['hat'], 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'id': 555, 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'r', 'id': 556, 'synset': 'hatch.n.03', 'synonyms': ['hatch'], 'def': 'a movable barrier covering a hatchway', 'name': 'hatch'}, {'frequency': 'c', 'id': 557, 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'def': 'a garment that covers the head and face', 'name': 'veil'}, {'frequency': 'f', 'id': 558, 'synset': 'headband.n.01', 'synonyms': ['headband'], 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'id': 559, 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'id': 560, 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'id': 561, 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'id': 562, 'synset': 'headset.n.01', 'synonyms': ['headset'], 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'id': 563, 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'r', 'id': 564, 'synset': 'hearing_aid.n.02', 'synonyms': ['hearing_aid'], 'def': 'an acoustic device used to direct sound to the ear of a hearing-impaired person', 'name': 'hearing_aid'}, {'frequency': 'c', 'id': 565, 'synset': 'heart.n.02', 'synonyms': ['heart'], 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'id': 566, 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'id': 567, 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'id': 568, 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'id': 569, 'synset': 'heron.n.02', 'synonyms': ['heron'], 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'id': 570, 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'id': 571, 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'id': 572, 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'id': 573, 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'id': 574, 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'id': 575, 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'id': 576, 'synset': 'honey.n.01', 'synonyms': ['honey'], 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'id': 577, 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'id': 578, 'synset': 'hook.n.05', 'synonyms': ['hook'], 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'f', 'id': 579, 'synset': 'horse.n.01', 'synonyms': ['horse'], 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'id': 580, 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'id': 581, 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'id': 582, 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'id': 583, 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'id': 584, 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'id': 585, 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'r', 'id': 586, 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'id': 587, 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'c', 'id': 588, 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'id': 589, 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'id': 590, 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'id': 591, 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'id': 592, 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'id': 593, 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'r', 'id': 594, 'synset': 'ice_tea.n.01', 'synonyms': ['ice_tea', 'iced_tea'], 'def': 'strong tea served over ice', 'name': 'ice_tea'}, {'frequency': 'c', 'id': 595, 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'id': 596, 'synset': 'incense.n.01', 'synonyms': ['incense'], 'def': 'a substance that produces a fragrant odor when burned', 'name': 'incense'}, {'frequency': 'r', 'id': 597, 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'c', 'id': 598, 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'id': 599, 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'r', 'id': 600, 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'id': 601, 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'r', 'id': 602, 'synset': 'jam.n.01', 'synonyms': ['jam'], 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'id': 603, 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'id': 604, 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'id': 605, 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'id': 606, 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'id': 607, 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'c', 'id': 608, 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'id': 609, 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'r', 'id': 610, 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'id': 611, 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'id': 612, 'synset': 'keg.n.02', 'synonyms': ['keg'], 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'id': 613, 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'id': 614, 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'id': 615, 'synset': 'key.n.01', 'synonyms': ['key'], 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'id': 616, 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'r', 'id': 617, 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'id': 618, 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'id': 619, 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'c', 'id': 620, 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'id': 621, 'synset': 'kite.n.03', 'synonyms': ['kite'], 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'id': 622, 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'id': 623, 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'id': 624, 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'id': 625, 'synset': 'knife.n.01', 'synonyms': ['knife'], 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'id': 626, 'synset': 'knight.n.02', 'synonyms': ['knight_(chess_piece)', 'horse_(chess_piece)'], 'def': 'a chess game piece shaped to resemble the head of a horse', 'name': 'knight_(chess_piece)'}, {'frequency': 'r', 'id': 627, 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'id': 628, 'synset': 'knob.n.02', 'synonyms': ['knob'], 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'id': 629, 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'id': 630, 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'id': 631, 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'id': 632, 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'id': 633, 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'r', 'id': 634, 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'c', 'id': 635, 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'id': 636, 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'id': 637, 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'id': 638, 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'id': 639, 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'id': 640, 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'id': 641, 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'id': 642, 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'id': 643, 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'c', 'id': 644, 'synset': 'latch.n.02', 'synonyms': ['latch'], 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'id': 645, 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'id': 646, 'synset': 'leather.n.01', 'synonyms': ['leather'], 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'id': 647, 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'id': 648, 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'f', 'id': 649, 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'id': 650, 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'id': 651, 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'id': 652, 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'id': 653, 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'id': 654, 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'id': 655, 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'def': 'glass bulb or tube shaped electric device that emits light (DO NOT MARK LAMPS AS A WHOLE)', 'name': 'lightbulb'}, {'frequency': 'r', 'id': 656, 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'c', 'id': 657, 'synset': 'lime.n.06', 'synonyms': ['lime'], 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'id': 658, 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'r', 'id': 659, 'synset': 'linen.n.02', 'synonyms': ['linen_paper'], 'def': 'a high-quality paper made of linen fibers or with a linen finish', 'name': 'linen_paper'}, {'frequency': 'c', 'id': 660, 'synset': 'lion.n.01', 'synonyms': ['lion'], 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'id': 661, 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'c', 'id': 662, 'synset': 'lipstick.n.01', 'synonyms': ['lipstick', 'lip_rouge'], 'def': 'makeup that is used to color the lips', 'name': 'lipstick'}, {'frequency': 'r', 'id': 663, 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'def': 'an alcoholic beverage that is distilled rather than fermented', 'name': 'liquor'}, {'frequency': 'r', 'id': 664, 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'r', 'id': 665, 'synset': 'loafer.n.02', 'synonyms': ['Loafer_(type_of_shoe)'], 'def': 'a low leather step-in shoe', 'name': 'Loafer_(type_of_shoe)'}, {'frequency': 'f', 'id': 666, 'synset': 'log.n.01', 'synonyms': ['log'], 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'id': 667, 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'c', 'id': 668, 'synset': 'lotion.n.01', 'synonyms': ['lotion'], 'def': 'any of various cosmetic preparations that are applied to the skin', 'name': 'lotion'}, {'frequency': 'f', 'id': 669, 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'id': 670, 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'id': 671, 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'id': 672, 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'id': 673, 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'r', 'id': 674, 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'c', 'id': 675, 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'id': 676, 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'id': 677, 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'c', 'id': 678, 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'id': 679, 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'id': 680, 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'c', 'id': 681, 'synset': 'map.n.01', 'synonyms': ['map'], 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'c', 'id': 682, 'synset': 'marker.n.03', 'synonyms': ['marker'], 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'id': 683, 'synset': 'martini.n.01', 'synonyms': ['martini'], 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'id': 684, 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'id': 685, 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'id': 686, 'synset': 'masher.n.02', 'synonyms': ['masher'], 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'id': 687, 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'id': 688, 'synset': 'mast.n.01', 'synonyms': ['mast'], 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'id': 689, 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'id': 690, 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'id': 691, 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'id': 692, 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'id': 693, 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'id': 694, 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'id': 695, 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'r', 'id': 696, 'synset': 'melon.n.01', 'synonyms': ['melon'], 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'id': 697, 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'id': 698, 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'id': 699, 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'id': 700, 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'c', 'id': 701, 'synset': 'milk.n.01', 'synonyms': ['milk'], 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'f', 'id': 702, 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'id': 703, 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'id': 704, 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'id': 705, 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'id': 706, 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'id': 707, 'synset': 'money.n.03', 'synonyms': ['money'], 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'id': 708, 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'id': 709, 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'id': 710, 'synset': 'motor.n.01', 'synonyms': ['motor'], 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'id': 711, 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'id': 712, 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'r', 'id': 713, 'synset': 'motorboat.n.01', 'synonyms': ['motorboat', 'powerboat'], 'def': 'a boat propelled by an internal-combustion engine', 'name': 'motorboat'}, {'frequency': 'f', 'id': 714, 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'id': 715, 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'r', 'id': 716, 'synset': 'mouse.n.01', 'synonyms': ['mouse_(animal_rodent)'], 'def': 'a small rodent with pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'name': 'mouse_(animal_rodent)'}, {'frequency': 'f', 'id': 717, 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'def': 'a computer input device that controls an on-screen pointer', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'id': 718, 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'id': 719, 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'id': 720, 'synset': 'mug.n.04', 'synonyms': ['mug'], 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'id': 721, 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'id': 722, 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'r', 'id': 723, 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'id': 724, 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'r', 'id': 725, 'synset': 'nameplate.n.01', 'synonyms': ['nameplate'], 'def': 'a plate bearing a name', 'name': 'nameplate'}, {'frequency': 'f', 'id': 726, 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'id': 727, 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'id': 728, 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'id': 729, 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'r', 'id': 730, 'synset': 'needle.n.03', 'synonyms': ['needle'], 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'id': 731, 'synset': 'nest.n.01', 'synonyms': ['nest'], 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'r', 'id': 732, 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'id': 733, 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'id': 734, 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'r', 'id': 735, 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'id': 736, 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'id': 737, 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'c', 'id': 738, 'synset': 'nut.n.03', 'synonyms': ['nut'], 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'id': 739, 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'c', 'id': 740, 'synset': 'oar.n.01', 'synonyms': ['oar'], 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'id': 741, 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'id': 742, 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'id': 743, 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'id': 744, 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'id': 745, 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'id': 746, 'synset': 'onion.n.01', 'synonyms': ['onion'], 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'id': 747, 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'id': 748, 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'r', 'id': 749, 'synset': 'oregano.n.01', 'synonyms': ['oregano', 'marjoram'], 'def': 'aromatic Eurasian perennial herb used in cooking and baking', 'name': 'oregano'}, {'frequency': 'c', 'id': 750, 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'c', 'id': 751, 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'def': 'thick cushion used as a seat', 'name': 'ottoman'}, {'frequency': 'c', 'id': 752, 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'id': 753, 'synset': 'owl.n.01', 'synonyms': ['owl'], 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'id': 754, 'synset': 'packet.n.03', 'synonyms': ['packet'], 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'id': 755, 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'id': 756, 'synset': 'pad.n.04', 'synonyms': ['pad'], 'def': 'a flat mass of soft material used for protection, stuffing, or comfort', 'name': 'pad'}, {'frequency': 'c', 'id': 757, 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'id': 758, 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'r', 'id': 759, 'synset': 'paintbox.n.01', 'synonyms': ['paintbox'], 'def': "a box containing a collection of cubes or tubes of artists' paint", 'name': 'paintbox'}, {'frequency': 'c', 'id': 760, 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'id': 761, 'synset': 'painting.n.01', 'synonyms': ['painting'], 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'c', 'id': 762, 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'id': 763, 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'id': 764, 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'id': 765, 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'id': 766, 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'id': 767, 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'id': 768, 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'r', 'id': 769, 'synset': 'paper_clip.n.01', 'synonyms': ['paperclip'], 'def': 'a wire or plastic clip for holding sheets of paper together', 'name': 'paperclip'}, {'frequency': 'f', 'id': 770, 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'id': 771, 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'id': 772, 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'id': 773, 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'id': 774, 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'r', 'id': 775, 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'id': 776, 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'r', 'id': 777, 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'r', 'id': 778, 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'id': 779, 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'id': 780, 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'id': 781, 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'id': 782, 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'r', 'id': 783, 'synset': 'passport.n.02', 'synonyms': ['passport'], 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'id': 784, 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'id': 785, 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'id': 786, 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'id': 787, 'synset': 'peach.n.03', 'synonyms': ['peach'], 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'id': 788, 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'c', 'id': 789, 'synset': 'pear.n.01', 'synonyms': ['pear'], 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'r', 'id': 790, 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'id': 791, 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'id': 792, 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'id': 793, 'synset': 'pen.n.01', 'synonyms': ['pen'], 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'c', 'id': 794, 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'id': 795, 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'id': 796, 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'id': 797, 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'id': 798, 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'id': 799, 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'id': 800, 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'c', 'id': 801, 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'id': 802, 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'id': 803, 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'id': 804, 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'id': 805, 'synset': 'person.n.01', 'synonyms': ['baby', 'child', 'boy', 'girl', 'man', 'woman', 'person', 'human'], 'def': 'a human being', 'name': 'baby'}, {'frequency': 'r', 'id': 806, 'synset': 'pet.n.01', 'synonyms': ['pet'], 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'r', 'id': 807, 'synset': 'petfood.n.01', 'synonyms': ['petfood', 'pet-food'], 'def': 'food prepared for animal pets', 'name': 'petfood'}, {'frequency': 'r', 'id': 808, 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'id': 809, 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'id': 810, 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'c', 'id': 811, 'synset': 'piano.n.01', 'synonyms': ['piano'], 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'id': 812, 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'id': 813, 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'id': 814, 'synset': 'pie.n.01', 'synonyms': ['pie'], 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'id': 815, 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'id': 816, 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'id': 817, 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'id': 818, 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'id': 819, 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'id': 820, 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'id': 821, 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'id': 822, 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'id': 823, 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'id': 824, 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'id': 825, 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'r', 'id': 826, 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'id': 827, 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'id': 828, 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'id': 829, 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'id': 830, 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'id': 831, 'synset': 'plate.n.04', 'synonyms': ['plate'], 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'id': 832, 'synset': 'platter.n.01', 'synonyms': ['platter'], 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'id': 833, 'synset': 'playing_card.n.01', 'synonyms': ['playing_card'], 'def': 'one of a pack of cards that are used to play card games', 'name': 'playing_card'}, {'frequency': 'r', 'id': 834, 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'id': 835, 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'id': 836, 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'id': 837, 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'id': 838, 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'id': 839, 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'id': 840, 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'r', 'id': 841, 'synset': 'police_van.n.01', 'synonyms': ['police_van', 'police_wagon', 'paddy_wagon', 'patrol_wagon'], 'def': 'van used by police to transport prisoners', 'name': 'police_van'}, {'frequency': 'f', 'id': 842, 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'id': 843, 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'id': 844, 'synset': 'pony.n.05', 'synonyms': ['pony'], 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'id': 845, 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'id': 846, 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'r', 'id': 847, 'synset': 'portrait.n.02', 'synonyms': ['portrait', 'portrayal'], 'def': 'any likeness of a person, in any medium', 'name': 'portrait'}, {'frequency': 'c', 'id': 848, 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'id': 849, 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'id': 850, 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'id': 851, 'synset': 'pot.n.01', 'synonyms': ['pot'], 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'id': 852, 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'id': 853, 'synset': 'potato.n.01', 'synonyms': ['potato'], 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'id': 854, 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'id': 855, 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'id': 856, 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'r', 'id': 857, 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'id': 858, 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'f', 'id': 859, 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'id': 860, 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'id': 861, 'synset': 'projector.n.02', 'synonyms': ['projector'], 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'id': 862, 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'id': 863, 'synset': 'prune.n.01', 'synonyms': ['prune'], 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'id': 864, 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'id': 865, 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'id': 866, 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'id': 867, 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'id': 868, 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'id': 869, 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'id': 870, 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'r', 'id': 871, 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'id': 872, 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'id': 873, 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'id': 874, 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'id': 875, 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'id': 876, 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'id': 877, 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'id': 878, 'synset': 'radar.n.01', 'synonyms': ['radar'], 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'c', 'id': 879, 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'id': 880, 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'id': 881, 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'id': 882, 'synset': 'raft.n.01', 'synonyms': ['raft'], 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'id': 883, 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'id': 884, 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'id': 885, 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'id': 886, 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'id': 887, 'synset': 'rat.n.01', 'synonyms': ['rat'], 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'id': 888, 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'id': 889, 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'id': 890, 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'def': 'car mirror that reflects the view out of the rear window', 'name': 'rearview_mirror'}, {'frequency': 'c', 'id': 891, 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'id': 892, 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'r', 'id': 893, 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'r', 'id': 894, 'synset': 'red_cabbage.n.02', 'synonyms': ['red_cabbage'], 'def': 'compact head of purplish-red leaves', 'name': 'red_cabbage'}, {'frequency': 'f', 'id': 895, 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'id': 896, 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'id': 897, 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'id': 898, 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'r', 'id': 899, 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'id': 900, 'synset': 'ring.n.08', 'synonyms': ['ring'], 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'id': 901, 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'id': 902, 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'id': 903, 'synset': 'robe.n.01', 'synonyms': ['robe'], 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'id': 904, 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'id': 905, 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'id': 906, 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'id': 907, 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'id': 908, 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'id': 909, 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'id': 910, 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'id': 911, 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'id': 912, 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'id': 913, 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'id': 914, 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'id': 915, 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'id': 916, 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'c', 'id': 917, 'synset': 'sail.n.01', 'synonyms': ['sail'], 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'c', 'id': 918, 'synset': 'salad.n.01', 'synonyms': ['salad'], 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'id': 919, 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'r', 'id': 920, 'synset': 'salami.n.01', 'synonyms': ['salami'], 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'r', 'id': 921, 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'id': 922, 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'r', 'id': 923, 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'id': 924, 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'id': 925, 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'id': 926, 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'id': 927, 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'id': 928, 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'id': 929, 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'id': 930, 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'id': 931, 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'id': 932, 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'id': 933, 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'id': 934, 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'id': 935, 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'id': 936, 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'id': 937, 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'c', 'id': 938, 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'c', 'id': 939, 'synset': 'scrambled_eggs.n.01', 'synonyms': ['scrambled_eggs'], 'def': 'eggs beaten and cooked to a soft firm consistency while stirring', 'name': 'scrambled_eggs'}, {'frequency': 'r', 'id': 940, 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'r', 'id': 941, 'synset': 'scratcher.n.03', 'synonyms': ['scratcher'], 'def': 'a device used for scratching', 'name': 'scratcher'}, {'frequency': 'c', 'id': 942, 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'c', 'id': 943, 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'id': 944, 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'r', 'id': 945, 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'r', 'id': 946, 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'id': 947, 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'id': 948, 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'r', 'id': 949, 'synset': 'seedling.n.01', 'synonyms': ['seedling'], 'def': 'young plant or tree grown from a seed', 'name': 'seedling'}, {'frequency': 'c', 'id': 950, 'synset': 'serving_dish.n.01', 'synonyms': ['serving_dish'], 'def': 'a dish used for serving food', 'name': 'serving_dish'}, {'frequency': 'r', 'id': 951, 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'r', 'id': 952, 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'id': 953, 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'r', 'id': 954, 'synset': 'shark.n.01', 'synonyms': ['shark'], 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'id': 955, 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'id': 956, 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'id': 957, 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'id': 958, 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'id': 959, 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'id': 960, 'synset': 'shears.n.01', 'synonyms': ['shears'], 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'id': 961, 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'id': 962, 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'id': 963, 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'r', 'id': 964, 'synset': 'shield.n.02', 'synonyms': ['shield'], 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'id': 965, 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'id': 966, 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'c', 'id': 967, 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'id': 968, 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'id': 969, 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'id': 970, 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'c', 'id': 971, 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'id': 972, 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'id': 973, 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'f', 'id': 974, 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'id': 975, 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'r', 'id': 976, 'synset': 'sieve.n.01', 'synonyms': ['sieve', 'screen_(sieve)'], 'def': 'a strainer for separating lumps from powdered material or grading particles', 'name': 'sieve'}, {'frequency': 'f', 'id': 977, 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'id': 978, 'synset': 'silo.n.01', 'synonyms': ['silo'], 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'id': 979, 'synset': 'sink.n.01', 'synonyms': ['sink'], 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'id': 980, 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'id': 981, 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'id': 982, 'synset': 'ski.n.01', 'synonyms': ['ski'], 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'id': 983, 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'id': 984, 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'id': 985, 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'id': 986, 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'c', 'id': 987, 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'id': 988, 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'id': 989, 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'id': 990, 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'id': 991, 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'id': 992, 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'id': 993, 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'id': 994, 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'id': 995, 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'id': 996, 'synset': 'soap.n.01', 'synonyms': ['soap'], 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'id': 997, 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'id': 998, 'synset': 'sock.n.01', 'synonyms': ['sock'], 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'r', 'id': 999, 'synset': 'soda_fountain.n.02', 'synonyms': ['soda_fountain'], 'def': 'an apparatus for dispensing soda water', 'name': 'soda_fountain'}, {'frequency': 'r', 'id': 1000, 'synset': 'soda_water.n.01', 'synonyms': ['carbonated_water', 'club_soda', 'seltzer', 'sparkling_water'], 'def': 'effervescent beverage artificially charged with carbon dioxide', 'name': 'carbonated_water'}, {'frequency': 'f', 'id': 1001, 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'id': 1002, 'synset': 'softball.n.01', 'synonyms': ['softball'], 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'id': 1003, 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'id': 1004, 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'c', 'id': 1005, 'synset': 'soup.n.01', 'synonyms': ['soup'], 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'id': 1006, 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'id': 1007, 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'id': 1008, 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'id': 1009, 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'id': 1010, 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'id': 1011, 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'id': 1012, 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'id': 1013, 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'id': 1014, 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'id': 1015, 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'r', 'id': 1016, 'synset': 'spider.n.01', 'synonyms': ['spider'], 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'c', 'id': 1017, 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'id': 1018, 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'id': 1019, 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'id': 1020, 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'id': 1021, 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'c', 'id': 1022, 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'r', 'id': 1023, 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'id': 1024, 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'id': 1025, 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'id': 1026, 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'r', 'id': 1027, 'synset': 'steamer.n.02', 'synonyms': ['steamer_(kitchen_appliance)'], 'def': 'a cooking utensil that can be used to cook food by steaming it', 'name': 'steamer_(kitchen_appliance)'}, {'frequency': 'f', 'id': 1028, 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'id': 1029, 'synset': 'stencil.n.01', 'synonyms': ['stencil'], 'def': 'a sheet of material (metal, plastic, etc.) that has been perforated with a pattern; ink or paint can pass through the perforations to create the printed pattern on the surface below', 'name': 'stencil'}, {'frequency': 'r', 'id': 1030, 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'id': 1031, 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'id': 1032, 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'id': 1033, 'synset': 'stew.n.02', 'synonyms': ['stew'], 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'id': 1034, 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'id': 1035, 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'c', 'id': 1036, 'synset': 'stocking.n.01', 'synonyms': ['stockings_(leg_wear)'], 'def': 'close-fitting hosiery to cover the foot and leg; come in matched pairs', 'name': 'stockings_(leg_wear)'}, {'frequency': 'f', 'id': 1037, 'synset': 'stool.n.01', 'synonyms': ['stool'], 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'id': 1038, 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'id': 1039, 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'id': 1040, 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'id': 1041, 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'id': 1042, 'synset': 'strap.n.01', 'synonyms': ['strap'], 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'id': 1043, 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'id': 1044, 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'id': 1045, 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'id': 1046, 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'id': 1047, 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'id': 1048, 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'def': 'a pointed tool for writing or drawing or engraving', 'name': 'stylus'}, {'frequency': 'r', 'id': 1049, 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'id': 1050, 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'id': 1051, 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'c', 'id': 1052, 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'id': 1053, 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'id': 1054, 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'id': 1055, 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'r', 'id': 1056, 'synset': 'sunscreen.n.01', 'synonyms': ['sunscreen', 'sunblock'], 'def': 'a cream spread on the skin; contains a chemical to filter out ultraviolet light and so protect from sunburn', 'name': 'sunscreen'}, {'frequency': 'f', 'id': 1057, 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'id': 1058, 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'id': 1059, 'synset': 'swab.n.02', 'synonyms': ['mop'], 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'id': 1060, 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'id': 1061, 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'id': 1062, 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'id': 1063, 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'id': 1064, 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'id': 1065, 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'id': 1066, 'synset': 'sword.n.01', 'synonyms': ['sword'], 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'id': 1067, 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'id': 1068, 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'id': 1069, 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'id': 1070, 'synset': 'table.n.02', 'synonyms': ['table'], 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'id': 1071, 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'id': 1072, 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'id': 1073, 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'id': 1074, 'synset': 'taco.n.02', 'synonyms': ['taco'], 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'id': 1075, 'synset': 'tag.n.02', 'synonyms': ['tag'], 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'id': 1076, 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'id': 1077, 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'id': 1078, 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'c', 'id': 1079, 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'id': 1080, 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'c', 'id': 1081, 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'id': 1082, 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'id': 1083, 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'id': 1084, 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'id': 1085, 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'id': 1086, 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'r', 'id': 1087, 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'id': 1088, 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'id': 1089, 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'c', 'id': 1090, 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'id': 1091, 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'id': 1092, 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'def': 'electronic device for communicating by voice over long distances', 'name': 'telephone'}, {'frequency': 'c', 'id': 1093, 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'id': 1094, 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'id': 1095, 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'id': 1096, 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'id': 1097, 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'id': 1098, 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'id': 1099, 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'id': 1100, 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'id': 1101, 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'id': 1102, 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'c', 'id': 1103, 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'id': 1104, 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'id': 1105, 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'id': 1106, 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'id': 1107, 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'id': 1108, 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'id': 1109, 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'id': 1110, 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'id': 1111, 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'r', 'id': 1112, 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'id': 1113, 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'id': 1114, 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'id': 1115, 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'c', 'id': 1116, 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'id': 1117, 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'id': 1118, 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'id': 1119, 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'c', 'id': 1120, 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'id': 1121, 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'id': 1122, 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'id': 1123, 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'c', 'id': 1124, 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'c', 'id': 1125, 'synset': 'top.n.09', 'synonyms': ['cover'], 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'id': 1126, 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'id': 1127, 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'id': 1128, 'synset': 'towel.n.01', 'synonyms': ['towel'], 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'id': 1129, 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'id': 1130, 'synset': 'toy.n.03', 'synonyms': ['toy'], 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'id': 1131, 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'id': 1132, 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'r', 'id': 1133, 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'c', 'id': 1134, 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'id': 1135, 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'id': 1136, 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'id': 1137, 'synset': 'tray.n.01', 'synonyms': ['tray'], 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'id': 1138, 'synset': 'tree_house.n.01', 'synonyms': ['tree_house'], 'def': '(NOT A TREE) a PLAYHOUSE built in the branches of a tree', 'name': 'tree_house'}, {'frequency': 'r', 'id': 1139, 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'id': 1140, 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'r', 'id': 1141, 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'c', 'id': 1142, 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'id': 1143, 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'id': 1144, 'synset': 'truck.n.01', 'synonyms': ['truck'], 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'id': 1145, 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'id': 1146, 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'id': 1147, 'synset': 'tub.n.02', 'synonyms': ['vat'], 'def': 'a large open vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'id': 1148, 'synset': 'turban.n.01', 'synonyms': ['turban'], 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'r', 'id': 1149, 'synset': 'turkey.n.01', 'synonyms': ['turkey_(bird)'], 'def': 'large gallinaceous bird with fan-shaped tail; widely domesticated for food', 'name': 'turkey_(bird)'}, {'frequency': 'c', 'id': 1150, 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'id': 1151, 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'id': 1152, 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'r', 'id': 1153, 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'r', 'id': 1154, 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'id': 1155, 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'c', 'id': 1156, 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'id': 1157, 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'c', 'id': 1158, 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'r', 'id': 1159, 'synset': 'urn.n.01', 'synonyms': ['urn'], 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'id': 1160, 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'c', 'id': 1161, 'synset': 'valve.n.03', 'synonyms': ['valve'], 'def': 'control consisting of a mechanical device for controlling the flow of a fluid', 'name': 'valve'}, {'frequency': 'f', 'id': 1162, 'synset': 'vase.n.01', 'synonyms': ['vase'], 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'id': 1163, 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'id': 1164, 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'c', 'id': 1165, 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'id': 1166, 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'id': 1167, 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'id': 1168, 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'r', 'id': 1169, 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'id': 1170, 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'id': 1171, 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'id': 1172, 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'id': 1173, 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'id': 1174, 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'id': 1175, 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'id': 1176, 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'id': 1177, 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'c', 'id': 1178, 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'id': 1179, 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'id': 1180, 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'id': 1181, 'synset': 'wasabi.n.02', 'synonyms': ['wasabi'], 'def': 'the thick green root of the wasabi plant that the Japanese use in cooking and that tastes like strong horseradish', 'name': 'wasabi'}, {'frequency': 'c', 'id': 1182, 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'id': 1183, 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'id': 1184, 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'id': 1185, 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'id': 1186, 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'id': 1187, 'synset': 'water_filter.n.01', 'synonyms': ['water_filter'], 'def': 'a filter to remove impurities from the water supply', 'name': 'water_filter'}, {'frequency': 'r', 'id': 1188, 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'r', 'id': 1189, 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'id': 1190, 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'id': 1191, 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'id': 1192, 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'id': 1193, 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'id': 1194, 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'c', 'id': 1195, 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'id': 1196, 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'id': 1197, 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'id': 1198, 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'id': 1199, 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'id': 1200, 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'id': 1201, 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'id': 1202, 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'id': 1203, 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'r', 'id': 1204, 'synset': 'whiskey.n.01', 'synonyms': ['whiskey'], 'def': 'a liquor made from fermented mash of grain', 'name': 'whiskey'}, {'frequency': 'r', 'id': 1205, 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'r', 'id': 1206, 'synset': 'wick.n.02', 'synonyms': ['wick'], 'def': 'a loosely woven cord in a candle or oil lamp that is lit on fire', 'name': 'wick'}, {'frequency': 'c', 'id': 1207, 'synset': 'wig.n.01', 'synonyms': ['wig'], 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'id': 1208, 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'id': 1209, 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'def': 'a mill that is powered by the wind', 'name': 'windmill'}, {'frequency': 'c', 'id': 1210, 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'id': 1211, 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'id': 1212, 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'id': 1213, 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'r', 'id': 1214, 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'id': 1215, 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'r', 'id': 1216, 'synset': 'wing_chair.n.01', 'synonyms': ['wing_chair'], 'def': 'easy chair having wings on each side of a high back', 'name': 'wing_chair'}, {'frequency': 'c', 'id': 1217, 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'id': 1218, 'synset': 'wok.n.01', 'synonyms': ['wok'], 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'id': 1219, 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'id': 1220, 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'id': 1221, 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'id': 1222, 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'c', 'id': 1223, 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'id': 1224, 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'r', 'id': 1225, 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'r', 'id': 1226, 'synset': 'yak.n.02', 'synonyms': ['yak'], 'def': 'large long-haired wild ox of Tibet often domesticated', 'name': 'yak'}, {'frequency': 'c', 'id': 1227, 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'r', 'id': 1228, 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'id': 1229, 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'id': 1230, 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}]  # noqa
+# fmt: on
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/lvis_v1_categories.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/lvis_v1_categories.py
new file mode 100644
index 0000000000000000000000000000000000000000..7374e6968bb006f5d8c49e75d9d3b31ea3d77d05
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/lvis_v1_categories.py
@@ -0,0 +1,16 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Autogen with
+# with open("lvis_v1_val.json", "r") as f:
+#     a = json.load(f)
+# c = a["categories"]
+# for x in c:
+#     del x["image_count"]
+#     del x["instance_count"]
+# LVIS_CATEGORIES = repr(c) + "  # noqa"
+# with open("/tmp/lvis_categories.py", "wt") as f:
+#     f.write(f"LVIS_CATEGORIES = {LVIS_CATEGORIES}")
+# Then paste the contents of that file below
+
+# fmt: off
+LVIS_CATEGORIES = [{'frequency': 'c', 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'id': 1, 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'id': 2, 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'id': 3, 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'f', 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'id': 4, 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'id': 5, 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'c', 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'id': 6, 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'synset': 'almond.n.02', 'synonyms': ['almond'], 'id': 7, 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'id': 8, 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'c', 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'id': 9, 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'id': 10, 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'id': 11, 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'synset': 'apple.n.01', 'synonyms': ['apple'], 'id': 12, 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'id': 13, 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'id': 14, 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'synset': 'apron.n.01', 'synonyms': ['apron'], 'id': 15, 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'id': 16, 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'r', 'synset': 'arctic.n.02', 'synonyms': ['arctic_(type_of_shoe)', 'galosh', 'golosh', 'rubber_(type_of_shoe)', 'gumshoe'], 'id': 17, 'def': 'a waterproof overshoe that protects shoes from water or snow', 'name': 'arctic_(type_of_shoe)'}, {'frequency': 'c', 'synset': 'armband.n.02', 'synonyms': ['armband'], 'id': 18, 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'id': 19, 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'id': 20, 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'id': 21, 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'id': 22, 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'id': 23, 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'id': 24, 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'id': 25, 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'id': 26, 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'f', 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'id': 27, 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'id': 28, 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'synset': 'awning.n.01', 'synonyms': ['awning'], 'id': 29, 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'id': 30, 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'r', 'synset': 'baboon.n.01', 'synonyms': ['baboon'], 'id': 31, 'def': 'large terrestrial monkeys having doglike muzzles', 'name': 'baboon'}, {'frequency': 'f', 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'id': 32, 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'id': 33, 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'id': 34, 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'id': 35, 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'id': 36, 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'id': 37, 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'id': 38, 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'id': 39, 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'id': 40, 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'synset': 'ball.n.06', 'synonyms': ['ball'], 'id': 41, 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'id': 42, 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'id': 43, 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'id': 44, 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'synset': 'banana.n.02', 'synonyms': ['banana'], 'id': 45, 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'c', 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'id': 46, 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'id': 47, 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'f', 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'id': 48, 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'id': 49, 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'id': 50, 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'id': 51, 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'synset': 'barge.n.01', 'synonyms': ['barge'], 'id': 52, 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'id': 53, 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'id': 54, 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'id': 55, 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'id': 56, 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'id': 57, 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'id': 58, 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'id': 59, 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'id': 60, 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'id': 61, 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'id': 62, 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'id': 63, 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'c', 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'id': 64, 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'id': 65, 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'id': 66, 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'id': 67, 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'id': 68, 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'id': 69, 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'synset': 'battery.n.02', 'synonyms': ['battery'], 'id': 70, 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'id': 71, 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'synset': 'bead.n.01', 'synonyms': ['bead'], 'id': 72, 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'c', 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'id': 73, 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'id': 74, 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'id': 75, 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'synset': 'bear.n.01', 'synonyms': ['bear'], 'id': 76, 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'synset': 'bed.n.01', 'synonyms': ['bed'], 'id': 77, 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'r', 'synset': 'bedpan.n.01', 'synonyms': ['bedpan'], 'id': 78, 'def': 'a shallow vessel used by a bedridden patient for defecation and urination', 'name': 'bedpan'}, {'frequency': 'f', 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'id': 79, 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'synset': 'beef.n.01', 'synonyms': ['cow'], 'id': 80, 'def': 'cattle/cow', 'name': 'cow'}, {'frequency': 'f', 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'id': 81, 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'id': 82, 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'id': 83, 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'id': 84, 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'id': 85, 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'synset': 'bell.n.01', 'synonyms': ['bell'], 'id': 86, 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'id': 87, 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'synset': 'belt.n.02', 'synonyms': ['belt'], 'id': 88, 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'id': 89, 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'synset': 'bench.n.01', 'synonyms': ['bench'], 'id': 90, 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'synset': 'beret.n.01', 'synonyms': ['beret'], 'id': 91, 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'synset': 'bib.n.02', 'synonyms': ['bib'], 'id': 92, 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'id': 93, 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'id': 94, 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'id': 95, 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'f', 'synset': 'billboard.n.01', 'synonyms': ['billboard'], 'id': 96, 'def': 'large outdoor signboard', 'name': 'billboard'}, {'frequency': 'c', 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'id': 97, 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'id': 98, 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'synset': 'bird.n.01', 'synonyms': ['bird'], 'id': 99, 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'c', 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'id': 100, 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'c', 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'id': 101, 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'id': 102, 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'id': 103, 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'id': 104, 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'id': 105, 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'id': 106, 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'id': 107, 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'synset': 'blackberry.n.01', 'synonyms': ['blackberry'], 'id': 108, 'def': 'large sweet black or very dark purple edible aggregate fruit', 'name': 'blackberry'}, {'frequency': 'f', 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'id': 109, 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'id': 110, 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'id': 111, 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'id': 112, 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'id': 113, 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'f', 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'id': 114, 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'f', 'synset': 'blouse.n.01', 'synonyms': ['blouse'], 'id': 115, 'def': 'a top worn by women', 'name': 'blouse'}, {'frequency': 'f', 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'id': 116, 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'id': 117, 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'id': 118, 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'r', 'synset': 'bob.n.05', 'synonyms': ['bob', 'bobber', 'bobfloat'], 'id': 119, 'def': 'a small float usually made of cork; attached to a fishing line', 'name': 'bob'}, {'frequency': 'c', 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'id': 120, 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'c', 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'id': 121, 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'id': 122, 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'id': 123, 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'id': 124, 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'id': 125, 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'id': 126, 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'synset': 'book.n.01', 'synonyms': ['book'], 'id': 127, 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'c', 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'id': 128, 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'id': 129, 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'id': 130, 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'id': 131, 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'synset': 'boot.n.01', 'synonyms': ['boot'], 'id': 132, 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'id': 133, 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'id': 134, 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'id': 135, 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'id': 136, 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'id': 137, 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'id': 138, 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'id': 139, 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'id': 140, 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'id': 141, 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'id': 142, 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'f', 'synset': 'box.n.01', 'synonyms': ['box'], 'id': 143, 'def': 'a (usually rectangular) container; may have a lid', 'name': 'box'}, {'frequency': 'r', 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'id': 144, 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'id': 145, 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'id': 146, 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'id': 147, 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'id': 148, 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'id': 149, 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'f', 'synset': 'bread.n.01', 'synonyms': ['bread'], 'id': 150, 'def': 'food made from dough of flour or meal and usually raised with yeast or baking powder and then baked', 'name': 'bread'}, {'frequency': 'r', 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'id': 151, 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'f', 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'id': 152, 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'id': 153, 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'f', 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'id': 154, 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'id': 155, 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'synset': 'broom.n.01', 'synonyms': ['broom'], 'id': 156, 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'id': 157, 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'id': 158, 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'id': 159, 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'id': 160, 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'id': 161, 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'synset': 'bull.n.11', 'synonyms': ['horned_cow'], 'id': 162, 'def': 'a cow with horns', 'name': 'bull'}, {'frequency': 'c', 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'id': 163, 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'id': 164, 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'id': 165, 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'id': 166, 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'id': 167, 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'id': 168, 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'f', 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'id': 169, 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'id': 170, 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'id': 171, 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'id': 172, 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'id': 173, 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'id': 174, 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'f', 'synset': 'butter.n.01', 'synonyms': ['butter'], 'id': 175, 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'id': 176, 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'synset': 'button.n.01', 'synonyms': ['button'], 'id': 177, 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'id': 178, 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'id': 179, 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'c', 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'id': 180, 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'id': 181, 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'id': 182, 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'synset': 'cake.n.03', 'synonyms': ['cake'], 'id': 183, 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'id': 184, 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'id': 185, 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'synset': 'calf.n.01', 'synonyms': ['calf'], 'id': 186, 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'id': 187, 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'synset': 'camel.n.01', 'synonyms': ['camel'], 'id': 188, 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'synset': 'camera.n.01', 'synonyms': ['camera'], 'id': 189, 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'id': 190, 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'id': 191, 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'id': 192, 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'id': 193, 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'f', 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'id': 194, 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'id': 195, 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'id': 196, 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'id': 197, 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'id': 198, 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'id': 199, 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'c', 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'id': 200, 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'c', 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'id': 201, 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'id': 202, 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'f', 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'id': 203, 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'id': 204, 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'c', 'synset': 'cape.n.02', 'synonyms': ['cape'], 'id': 205, 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'id': 206, 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'id': 207, 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'id': 208, 'def': 'a wheeled vehicle adapted to the rails of railroad (mark each individual railcar separately)', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'id': 209, 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'id': 210, 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'id': 211, 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'synset': 'card.n.03', 'synonyms': ['card'], 'id': 212, 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'c', 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'id': 213, 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'id': 214, 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'id': 215, 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'id': 216, 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'id': 217, 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'f', 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'id': 218, 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'synset': 'cart.n.01', 'synonyms': ['cart'], 'id': 219, 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'synset': 'carton.n.02', 'synonyms': ['carton'], 'id': 220, 'def': 'a container made of cardboard for holding food or drink', 'name': 'carton'}, {'frequency': 'c', 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'id': 221, 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'id': 222, 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'id': 223, 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'id': 224, 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'synset': 'cat.n.01', 'synonyms': ['cat'], 'id': 225, 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'f', 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'id': 226, 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'c', 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'id': 227, 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'id': 228, 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'f', 'synset': 'celery.n.01', 'synonyms': ['celery'], 'id': 229, 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'id': 230, 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'id': 231, 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'synset': 'chair.n.01', 'synonyms': ['chair'], 'id': 232, 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'id': 233, 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'synset': 'chalice.n.01', 'synonyms': ['chalice'], 'id': 234, 'def': 'a bowl-shaped drinking vessel; especially the Eucharistic cup', 'name': 'chalice'}, {'frequency': 'f', 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'id': 235, 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'synset': 'chap.n.04', 'synonyms': ['chap'], 'id': 236, 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'id': 237, 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'id': 238, 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'id': 239, 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'id': 240, 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'c', 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'id': 241, 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'id': 242, 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'c', 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'id': 243, 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'id': 244, 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'id': 245, 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'id': 246, 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'id': 247, 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'id': 248, 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'id': 249, 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'id': 250, 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'id': 251, 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'id': 252, 'def': 'shirt collar, animal collar, or tight-fitting necklace', 'name': 'choker'}, {'frequency': 'f', 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'id': 253, 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'f', 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'id': 254, 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'id': 255, 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'synset': 'chute.n.02', 'synonyms': ['slide'], 'id': 256, 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'id': 257, 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'id': 258, 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'f', 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'id': 259, 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'id': 260, 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'id': 261, 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'id': 262, 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'c', 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'id': 263, 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'id': 264, 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'synset': 'cleat.n.02', 'synonyms': ['cleat_(for_securing_rope)'], 'id': 265, 'def': 'a fastener (usually with two projecting horns) around which a rope can be secured', 'name': 'cleat_(for_securing_rope)'}, {'frequency': 'r', 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'id': 266, 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'synset': 'clip.n.03', 'synonyms': ['clip'], 'id': 267, 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'id': 268, 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'r', 'synset': 'clipper.n.03', 'synonyms': ['clippers_(for_plants)'], 'id': 269, 'def': 'shears for cutting grass or shrubbery (often used in the plural)', 'name': 'clippers_(for_plants)'}, {'frequency': 'r', 'synset': 'cloak.n.02', 'synonyms': ['cloak'], 'id': 270, 'def': 'a loose outer garment', 'name': 'cloak'}, {'frequency': 'f', 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'id': 271, 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'id': 272, 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'id': 273, 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'id': 274, 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'id': 275, 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'id': 276, 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'synset': 'coat.n.01', 'synonyms': ['coat'], 'id': 277, 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'id': 278, 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'c', 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'id': 279, 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'id': 280, 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'r', 'synset': 'cockroach.n.01', 'synonyms': ['cockroach'], 'id': 281, 'def': 'any of numerous chiefly nocturnal insects; some are domestic pests', 'name': 'cockroach'}, {'frequency': 'r', 'synset': 'cocoa.n.01', 'synonyms': ['cocoa_(beverage)', 'hot_chocolate_(beverage)', 'drinking_chocolate'], 'id': 282, 'def': 'a beverage made from cocoa powder and milk and sugar; usually drunk hot', 'name': 'cocoa_(beverage)'}, {'frequency': 'c', 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'id': 283, 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'f', 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'id': 284, 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'id': 285, 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'id': 286, 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'synset': 'coil.n.05', 'synonyms': ['coil'], 'id': 287, 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'synset': 'coin.n.01', 'synonyms': ['coin'], 'id': 288, 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'c', 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'id': 289, 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'id': 290, 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'id': 291, 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'id': 292, 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'id': 293, 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'id': 294, 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'r', 'synset': 'compass.n.01', 'synonyms': ['compass'], 'id': 295, 'def': 'navigational instrument for finding directions', 'name': 'compass'}, {'frequency': 'f', 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'id': 296, 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'f', 'synset': 'condiment.n.01', 'synonyms': ['condiment'], 'id': 297, 'def': 'a preparation (a sauce or relish or spice) to enhance flavor or enjoyment', 'name': 'condiment'}, {'frequency': 'f', 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'id': 298, 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'id': 299, 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'id': 300, 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'id': 301, 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'r', 'synset': 'cooker.n.01', 'synonyms': ['cooker'], 'id': 302, 'def': 'a utensil for cooking', 'name': 'cooker'}, {'frequency': 'f', 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'id': 303, 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'id': 304, 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'id': 305, 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'f', 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'id': 306, 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'id': 307, 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'c', 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'id': 308, 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'f', 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'id': 309, 'def': 'ears or kernels of corn that can be prepared and served for human food (only mark individual ears or kernels)', 'name': 'edible_corn'}, {'frequency': 'r', 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'id': 310, 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'id': 311, 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'id': 312, 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'id': 313, 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'c', 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'id': 314, 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'c', 'synset': 'costume.n.04', 'synonyms': ['costume'], 'id': 315, 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'id': 316, 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'id': 317, 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'c', 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'id': 318, 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'id': 319, 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'c', 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'id': 320, 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'r', 'synset': 'crab.n.05', 'synonyms': ['crabmeat'], 'id': 321, 'def': 'the edible flesh of any of various crabs', 'name': 'crabmeat'}, {'frequency': 'c', 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'id': 322, 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'id': 323, 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'synset': 'crate.n.01', 'synonyms': ['crate'], 'id': 324, 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'c', 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'id': 325, 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'id': 326, 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'c', 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'id': 327, 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'id': 328, 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'id': 329, 'def': 'an earthen jar (made of baked clay) or a modern electric crockpot', 'name': 'crock_pot'}, {'frequency': 'f', 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'id': 330, 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'id': 331, 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'c', 'synset': 'crow.n.01', 'synonyms': ['crow'], 'id': 332, 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'r', 'synset': 'crowbar.n.01', 'synonyms': ['crowbar', 'wrecking_bar', 'pry_bar'], 'id': 333, 'def': 'a heavy iron lever with one end forged into a wedge', 'name': 'crowbar'}, {'frequency': 'c', 'synset': 'crown.n.04', 'synonyms': ['crown'], 'id': 334, 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'id': 335, 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'id': 336, 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'id': 337, 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'f', 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'id': 338, 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'c', 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'id': 339, 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'id': 340, 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'c', 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'id': 341, 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'id': 342, 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'id': 343, 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'synset': 'cup.n.01', 'synonyms': ['cup'], 'id': 344, 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'id': 345, 'def': 'a metal award or cup-shaped vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'f', 'synset': 'cupboard.n.01', 'synonyms': ['cupboard', 'closet'], 'id': 346, 'def': 'a small room (or recess) or cabinet used for storage space', 'name': 'cupboard'}, {'frequency': 'f', 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'id': 347, 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'id': 348, 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'id': 349, 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'id': 350, 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'id': 351, 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'id': 352, 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'id': 353, 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'id': 354, 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'synset': 'dalmatian.n.02', 'synonyms': ['dalmatian'], 'id': 355, 'def': 'a large breed having a smooth white coat with black or brown spots', 'name': 'dalmatian'}, {'frequency': 'c', 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'id': 356, 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'id': 357, 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'id': 358, 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'id': 359, 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'id': 360, 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'synset': 'desk.n.01', 'synonyms': ['desk'], 'id': 361, 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'id': 362, 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'id': 363, 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'id': 364, 'def': 'yearly planner book', 'name': 'diary'}, {'frequency': 'r', 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'id': 365, 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'id': 366, 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'id': 367, 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'id': 368, 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'f', 'synset': 'dish.n.01', 'synonyms': ['dish'], 'id': 369, 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'id': 370, 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'id': 371, 'def': 'a cloth for washing dishes or cleaning in general', 'name': 'dishrag'}, {'frequency': 'f', 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'id': 372, 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'id': 373, 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid', 'dishsoap'], 'id': 374, 'def': 'dishsoap or dish detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'f', 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'id': 375, 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'r', 'synset': 'diving_board.n.01', 'synonyms': ['diving_board'], 'id': 376, 'def': 'a springboard from which swimmers can dive', 'name': 'diving_board'}, {'frequency': 'f', 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'id': 377, 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'synset': 'dog.n.01', 'synonyms': ['dog'], 'id': 378, 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'id': 379, 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'f', 'synset': 'doll.n.01', 'synonyms': ['doll'], 'id': 380, 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'id': 381, 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'synset': 'dollhouse.n.01', 'synonyms': ['dollhouse', "doll's_house"], 'id': 382, 'def': "a house so small that it is likened to a child's plaything", 'name': 'dollhouse'}, {'frequency': 'c', 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'id': 383, 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'id': 384, 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'f', 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'id': 385, 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'id': 386, 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'id': 387, 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'synset': 'dove.n.01', 'synonyms': ['dove'], 'id': 388, 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'id': 389, 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'id': 390, 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'id': 391, 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'id': 392, 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'id': 393, 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'f', 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'id': 394, 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'f', 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'id': 395, 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'synset': 'drill.n.01', 'synonyms': ['drill'], 'id': 396, 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'synset': 'drone.n.04', 'synonyms': ['drone'], 'id': 397, 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'id': 398, 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'id': 399, 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'id': 400, 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'synset': 'duck.n.01', 'synonyms': ['duck'], 'id': 401, 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'c', 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'id': 402, 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'id': 403, 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'id': 404, 'def': 'a large cylindrical bag of heavy cloth (does not include suitcases)', 'name': 'duffel_bag'}, {'frequency': 'r', 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'id': 405, 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'id': 406, 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'id': 407, 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'c', 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'id': 408, 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'id': 409, 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'id': 410, 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'synset': 'earring.n.01', 'synonyms': ['earring'], 'id': 411, 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'synset': 'easel.n.01', 'synonyms': ['easel'], 'id': 412, 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'id': 413, 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'synset': 'eel.n.01', 'synonyms': ['eel'], 'id': 414, 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'id': 415, 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'id': 416, 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'id': 417, 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'id': 418, 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'id': 419, 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'id': 420, 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'id': 421, 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'id': 422, 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'c', 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'id': 423, 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'id': 424, 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'id': 425, 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'id': 426, 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'id': 427, 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'id': 428, 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'synset': 'fan.n.01', 'synonyms': ['fan'], 'id': 429, 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'id': 430, 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'id': 431, 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'id': 432, 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'id': 433, 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'c', 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'id': 434, 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'id': 435, 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'id': 436, 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'id': 437, 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'id': 438, 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'id': 439, 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'id': 440, 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'f', 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'id': 441, 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'f', 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'id': 442, 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'id': 443, 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'id': 444, 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'id': 445, 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'r', 'synset': 'first-aid_kit.n.01', 'synonyms': ['first-aid_kit'], 'id': 446, 'def': 'kit consisting of a set of bandages and medicines for giving first aid', 'name': 'first-aid_kit'}, {'frequency': 'f', 'synset': 'fish.n.01', 'synonyms': ['fish'], 'id': 447, 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'c', 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'id': 448, 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'id': 449, 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'c', 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'id': 450, 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'synset': 'flag.n.01', 'synonyms': ['flag'], 'id': 451, 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'id': 452, 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'id': 453, 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'id': 454, 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'c', 'synset': 'flap.n.01', 'synonyms': ['flap'], 'id': 455, 'def': 'any broad thin covering attached at one edge, such as a mud flap next to a wheel or a flap on an airplane wing', 'name': 'flap'}, {'frequency': 'r', 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'id': 456, 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'id': 457, 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'id': 458, 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'id': 459, 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'id': 460, 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'id': 461, 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'id': 462, 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'c', 'synset': 'foal.n.01', 'synonyms': ['foal'], 'id': 463, 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'id': 464, 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'id': 465, 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'id': 466, 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'id': 467, 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'id': 468, 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'synset': 'fork.n.01', 'synonyms': ['fork'], 'id': 469, 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'c', 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'id': 470, 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'c', 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'id': 471, 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'c', 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'id': 472, 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'id': 473, 'def': 'anything that freshens air by removing or covering odor', 'name': 'freshener'}, {'frequency': 'f', 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'id': 474, 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'id': 475, 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'id': 476, 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'f', 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'id': 477, 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'id': 478, 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'id': 479, 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'r', 'synset': 'futon.n.01', 'synonyms': ['futon'], 'id': 480, 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'id': 481, 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'id': 482, 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'id': 483, 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'id': 484, 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'id': 485, 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'id': 486, 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'id': 487, 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'id': 488, 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'c', 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'id': 489, 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'id': 490, 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'id': 491, 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'r', 'synset': 'generator.n.02', 'synonyms': ['generator'], 'id': 492, 'def': 'engine that converts mechanical energy into electrical energy by electromagnetic induction', 'name': 'generator'}, {'frequency': 'c', 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'id': 493, 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'id': 494, 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'id': 495, 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'id': 496, 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'id': 497, 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'id': 498, 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'synset': 'globe.n.03', 'synonyms': ['globe'], 'id': 499, 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'synset': 'glove.n.02', 'synonyms': ['glove'], 'id': 500, 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'synset': 'goat.n.01', 'synonyms': ['goat'], 'id': 501, 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'id': 502, 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'id': 503, 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'c', 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'id': 504, 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'id': 505, 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'id': 506, 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'synset': 'goose.n.01', 'synonyms': ['goose'], 'id': 507, 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'id': 508, 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'id': 509, 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'f', 'synset': 'grape.n.01', 'synonyms': ['grape'], 'id': 510, 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'c', 'synset': 'grater.n.01', 'synonyms': ['grater'], 'id': 511, 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'id': 512, 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'id': 513, 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'f', 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'id': 514, 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'f', 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'id': 515, 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'id': 516, 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'f', 'synset': 'grill.n.02', 'synonyms': ['grill', 'grille', 'grillwork', 'radiator_grille'], 'id': 517, 'def': 'a framework of metal bars used as a partition or a grate', 'name': 'grill'}, {'frequency': 'r', 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'id': 518, 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'id': 519, 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'id': 520, 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'f', 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'id': 521, 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'id': 522, 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'synset': 'gun.n.01', 'synonyms': ['gun'], 'id': 523, 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'f', 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'id': 524, 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'id': 525, 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'id': 526, 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'r', 'synset': 'halter.n.03', 'synonyms': ['halter_top'], 'id': 527, 'def': "a woman's top that fastens behind the back and neck leaving the back and arms uncovered", 'name': 'halter_top'}, {'frequency': 'f', 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'id': 528, 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'id': 529, 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'id': 530, 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'c', 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'id': 531, 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'id': 532, 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'c', 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'id': 533, 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'f', 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'id': 534, 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'id': 535, 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'id': 536, 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'id': 537, 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'id': 538, 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'id': 539, 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'id': 540, 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'id': 541, 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'id': 542, 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'id': 543, 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'synset': 'hat.n.01', 'synonyms': ['hat'], 'id': 544, 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'id': 545, 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'c', 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'id': 546, 'def': 'a garment that covers the head OR face', 'name': 'veil'}, {'frequency': 'f', 'synset': 'headband.n.01', 'synonyms': ['headband'], 'id': 547, 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'id': 548, 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'id': 549, 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'id': 550, 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'synset': 'headset.n.01', 'synonyms': ['headset'], 'id': 551, 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'id': 552, 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'c', 'synset': 'heart.n.02', 'synonyms': ['heart'], 'id': 553, 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'id': 554, 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'id': 555, 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'id': 556, 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'synset': 'heron.n.02', 'synonyms': ['heron'], 'id': 557, 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'id': 558, 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'id': 559, 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'id': 560, 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'id': 561, 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'id': 562, 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'id': 563, 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'synset': 'honey.n.01', 'synonyms': ['honey'], 'id': 564, 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'id': 565, 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'synset': 'hook.n.05', 'synonyms': ['hook'], 'id': 566, 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'r', 'synset': 'hookah.n.01', 'synonyms': ['hookah', 'narghile', 'nargileh', 'sheesha', 'shisha', 'water_pipe'], 'id': 567, 'def': 'a tobacco pipe with a long flexible tube connected to a container where the smoke is cooled by passing through water', 'name': 'hookah'}, {'frequency': 'r', 'synset': 'hornet.n.01', 'synonyms': ['hornet'], 'id': 568, 'def': 'large stinging wasp', 'name': 'hornet'}, {'frequency': 'f', 'synset': 'horse.n.01', 'synonyms': ['horse'], 'id': 569, 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'id': 570, 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'id': 571, 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'id': 572, 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'id': 573, 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'id': 574, 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'id': 575, 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'c', 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'id': 576, 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'id': 577, 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'f', 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'id': 578, 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'id': 579, 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'id': 580, 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'id': 581, 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'id': 582, 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'id': 583, 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'c', 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'id': 584, 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'id': 585, 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'f', 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'id': 586, 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'id': 587, 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'c', 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'id': 588, 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'id': 589, 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'c', 'synset': 'jam.n.01', 'synonyms': ['jam'], 'id': 590, 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'synset': 'jar.n.01', 'synonyms': ['jar'], 'id': 591, 'def': 'a vessel (usually cylindrical) with a wide mouth and without handles', 'name': 'jar'}, {'frequency': 'f', 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'id': 592, 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'id': 593, 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'id': 594, 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'id': 595, 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'id': 596, 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'r', 'synset': 'jewel.n.01', 'synonyms': ['jewel', 'gem', 'precious_stone'], 'id': 597, 'def': 'a precious or semiprecious stone incorporated into a piece of jewelry', 'name': 'jewel'}, {'frequency': 'c', 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'id': 598, 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'id': 599, 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'c', 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'id': 600, 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'id': 601, 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'synset': 'keg.n.02', 'synonyms': ['keg'], 'id': 602, 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'id': 603, 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'id': 604, 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'synset': 'key.n.01', 'synonyms': ['key'], 'id': 605, 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'id': 606, 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'c', 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'id': 607, 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'id': 608, 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'id': 609, 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'r', 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'id': 610, 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'synset': 'kite.n.03', 'synonyms': ['kite'], 'id': 611, 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'id': 612, 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'id': 613, 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'id': 614, 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'synset': 'knife.n.01', 'synonyms': ['knife'], 'id': 615, 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'id': 616, 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'synset': 'knob.n.02', 'synonyms': ['knob'], 'id': 617, 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'id': 618, 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'id': 619, 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'id': 620, 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'id': 621, 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'id': 622, 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'c', 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'id': 623, 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'f', 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'id': 624, 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'id': 625, 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'id': 626, 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'id': 627, 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'id': 628, 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'id': 629, 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'id': 630, 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'id': 631, 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'id': 632, 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'f', 'synset': 'latch.n.02', 'synonyms': ['latch'], 'id': 633, 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'id': 634, 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'synset': 'leather.n.01', 'synonyms': ['leather'], 'id': 635, 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'id': 636, 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'id': 637, 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'r', 'synset': 'legume.n.02', 'synonyms': ['legume'], 'id': 638, 'def': 'the fruit or seed of bean or pea plants', 'name': 'legume'}, {'frequency': 'f', 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'id': 639, 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'id': 640, 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'id': 641, 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'id': 642, 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'id': 643, 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'id': 644, 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'id': 645, 'def': 'lightblub/source of light', 'name': 'lightbulb'}, {'frequency': 'r', 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'id': 646, 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'f', 'synset': 'lime.n.06', 'synonyms': ['lime'], 'id': 647, 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'id': 648, 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'c', 'synset': 'lion.n.01', 'synonyms': ['lion'], 'id': 649, 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'id': 650, 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'r', 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'id': 651, 'def': 'liquor or beer', 'name': 'liquor'}, {'frequency': 'c', 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'id': 652, 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'f', 'synset': 'log.n.01', 'synonyms': ['log'], 'id': 653, 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'id': 654, 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'f', 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'id': 655, 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'id': 656, 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'id': 657, 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'id': 658, 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'id': 659, 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'c', 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'id': 660, 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'f', 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'id': 661, 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'synset': 'mallard.n.01', 'synonyms': ['mallard'], 'id': 662, 'def': 'wild dabbling duck from which domestic ducks are descended', 'name': 'mallard'}, {'frequency': 'r', 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'id': 663, 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'id': 664, 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'r', 'synset': 'manatee.n.01', 'synonyms': ['manatee'], 'id': 665, 'def': 'sirenian mammal of tropical coastal waters of America', 'name': 'manatee'}, {'frequency': 'c', 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'id': 666, 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'id': 667, 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'id': 668, 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'f', 'synset': 'map.n.01', 'synonyms': ['map'], 'id': 669, 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'f', 'synset': 'marker.n.03', 'synonyms': ['marker'], 'id': 670, 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'synset': 'martini.n.01', 'synonyms': ['martini'], 'id': 671, 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'id': 672, 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'id': 673, 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'synset': 'masher.n.02', 'synonyms': ['masher'], 'id': 674, 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'id': 675, 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'synset': 'mast.n.01', 'synonyms': ['mast'], 'id': 676, 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'id': 677, 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'id': 678, 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'id': 679, 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'id': 680, 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'id': 681, 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'id': 682, 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'id': 683, 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'c', 'synset': 'melon.n.01', 'synonyms': ['melon'], 'id': 684, 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'id': 685, 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'id': 686, 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'id': 687, 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'id': 688, 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'f', 'synset': 'milk.n.01', 'synonyms': ['milk'], 'id': 689, 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'r', 'synset': 'milk_can.n.01', 'synonyms': ['milk_can'], 'id': 690, 'def': 'can for transporting milk', 'name': 'milk_can'}, {'frequency': 'r', 'synset': 'milkshake.n.01', 'synonyms': ['milkshake'], 'id': 691, 'def': 'frothy drink of milk and flavoring and sometimes fruit or ice cream', 'name': 'milkshake'}, {'frequency': 'f', 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'id': 692, 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'id': 693, 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'id': 694, 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'id': 695, 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'id': 696, 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'synset': 'money.n.03', 'synonyms': ['money'], 'id': 697, 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'id': 698, 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'id': 699, 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'synset': 'motor.n.01', 'synonyms': ['motor'], 'id': 700, 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'id': 701, 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'id': 702, 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'f', 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'id': 703, 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'id': 704, 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'f', 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'id': 705, 'def': 'a computer input device that controls an on-screen pointer (does not include trackpads / touchpads)', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'id': 706, 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'id': 707, 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'synset': 'mug.n.04', 'synonyms': ['mug'], 'id': 708, 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'id': 709, 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'id': 710, 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'c', 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'id': 711, 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'id': 712, 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'f', 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'id': 713, 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'id': 714, 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'id': 715, 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'id': 716, 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'c', 'synset': 'needle.n.03', 'synonyms': ['needle'], 'id': 717, 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'synset': 'nest.n.01', 'synonyms': ['nest'], 'id': 718, 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'f', 'synset': 'newspaper.n.01', 'synonyms': ['newspaper', 'paper_(newspaper)'], 'id': 719, 'def': 'a daily or weekly publication on folded sheets containing news, articles, and advertisements', 'name': 'newspaper'}, {'frequency': 'c', 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'id': 720, 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'id': 721, 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'id': 722, 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'c', 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'id': 723, 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'id': 724, 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'id': 725, 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'f', 'synset': 'nut.n.03', 'synonyms': ['nut'], 'id': 726, 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'id': 727, 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'f', 'synset': 'oar.n.01', 'synonyms': ['oar'], 'id': 728, 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'id': 729, 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'id': 730, 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'id': 731, 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'id': 732, 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'id': 733, 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'synset': 'onion.n.01', 'synonyms': ['onion'], 'id': 734, 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'id': 735, 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'id': 736, 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'c', 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'id': 737, 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'f', 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'id': 738, 'def': 'a thick standalone cushion used as a seat or footrest, often next to a chair', 'name': 'ottoman'}, {'frequency': 'f', 'synset': 'oven.n.01', 'synonyms': ['oven'], 'id': 739, 'def': 'kitchen appliance used for baking or roasting', 'name': 'oven'}, {'frequency': 'c', 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'id': 740, 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'synset': 'owl.n.01', 'synonyms': ['owl'], 'id': 741, 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'synset': 'packet.n.03', 'synonyms': ['packet'], 'id': 742, 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'id': 743, 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'synset': 'pad.n.04', 'synonyms': ['pad'], 'id': 744, 'def': 'mostly arm/knee pads labeled', 'name': 'pad'}, {'frequency': 'f', 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'id': 745, 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'id': 746, 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'c', 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'id': 747, 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'synset': 'painting.n.01', 'synonyms': ['painting'], 'id': 748, 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'f', 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'id': 749, 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'id': 750, 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'id': 751, 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'id': 752, 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'id': 753, 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'id': 754, 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'id': 755, 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'f', 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'id': 756, 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'id': 757, 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'id': 758, 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'id': 759, 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'id': 760, 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'c', 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'id': 761, 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'id': 762, 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'c', 'synset': 'parasol.n.01', 'synonyms': ['parasol', 'sunshade'], 'id': 763, 'def': 'a handheld collapsible source of shade', 'name': 'parasol'}, {'frequency': 'r', 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'id': 764, 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'c', 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'id': 765, 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'id': 766, 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'id': 767, 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'id': 768, 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'id': 769, 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'c', 'synset': 'passport.n.02', 'synonyms': ['passport'], 'id': 770, 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'id': 771, 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'id': 772, 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'id': 773, 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'synset': 'peach.n.03', 'synonyms': ['peach'], 'id': 774, 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'id': 775, 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'f', 'synset': 'pear.n.01', 'synonyms': ['pear'], 'id': 776, 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'c', 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'id': 777, 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'synset': 'peg.n.04', 'synonyms': ['wooden_leg', 'pegleg'], 'id': 778, 'def': 'a prosthesis that replaces a missing leg', 'name': 'wooden_leg'}, {'frequency': 'r', 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'id': 779, 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'id': 780, 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'synset': 'pen.n.01', 'synonyms': ['pen'], 'id': 781, 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'f', 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'id': 782, 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'id': 783, 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'id': 784, 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'id': 785, 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'id': 786, 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'id': 787, 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'id': 788, 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'f', 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'id': 789, 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'id': 790, 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'id': 791, 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'id': 792, 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'synset': 'person.n.01', 'synonyms': ['person', 'baby', 'child', 'boy', 'girl', 'man', 'woman', 'human'], 'id': 793, 'def': 'a human being', 'name': 'person'}, {'frequency': 'c', 'synset': 'pet.n.01', 'synonyms': ['pet'], 'id': 794, 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'c', 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'id': 795, 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'id': 796, 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'id': 797, 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'f', 'synset': 'piano.n.01', 'synonyms': ['piano'], 'id': 798, 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'id': 799, 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'id': 800, 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'synset': 'pie.n.01', 'synonyms': ['pie'], 'id': 801, 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'id': 802, 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'id': 803, 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'id': 804, 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'id': 805, 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'id': 806, 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'id': 807, 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'id': 808, 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'id': 809, 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'id': 810, 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'id': 811, 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'id': 812, 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'c', 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'id': 813, 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'id': 814, 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'id': 815, 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'id': 816, 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'id': 817, 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'synset': 'plate.n.04', 'synonyms': ['plate'], 'id': 818, 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'synset': 'platter.n.01', 'synonyms': ['platter'], 'id': 819, 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'id': 820, 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'id': 821, 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'id': 822, 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'synset': 'plume.n.02', 'synonyms': ['plume'], 'id': 823, 'def': 'a feather or cluster of feathers worn as an ornament', 'name': 'plume'}, {'frequency': 'r', 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'id': 824, 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'id': 825, 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'id': 826, 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'id': 827, 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'f', 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'id': 828, 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'id': 829, 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'synset': 'pony.n.05', 'synonyms': ['pony'], 'id': 830, 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'id': 831, 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'id': 832, 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'c', 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'id': 833, 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'id': 834, 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'id': 835, 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'synset': 'pot.n.01', 'synonyms': ['pot'], 'id': 836, 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'id': 837, 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'synset': 'potato.n.01', 'synonyms': ['potato'], 'id': 838, 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'id': 839, 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'id': 840, 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'id': 841, 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'c', 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'id': 842, 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'id': 843, 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'c', 'synset': 'pretzel.n.01', 'synonyms': ['pretzel'], 'id': 844, 'def': 'glazed and salted cracker typically in the shape of a loose knot', 'name': 'pretzel'}, {'frequency': 'f', 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'id': 845, 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'id': 846, 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'synset': 'projector.n.02', 'synonyms': ['projector'], 'id': 847, 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'id': 848, 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'synset': 'prune.n.01', 'synonyms': ['prune'], 'id': 849, 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'id': 850, 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'id': 851, 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'id': 852, 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'id': 853, 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'id': 854, 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'id': 855, 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'id': 856, 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'c', 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'id': 857, 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'id': 858, 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'id': 859, 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'id': 860, 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'id': 861, 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'id': 862, 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'id': 863, 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'synset': 'radar.n.01', 'synonyms': ['radar'], 'id': 864, 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'f', 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'id': 865, 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'id': 866, 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'id': 867, 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'synset': 'raft.n.01', 'synonyms': ['raft'], 'id': 868, 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'id': 869, 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'id': 870, 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'id': 871, 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'id': 872, 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'synset': 'rat.n.01', 'synonyms': ['rat'], 'id': 873, 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'id': 874, 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'id': 875, 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'id': 876, 'def': 'vehicle mirror (side or rearview)', 'name': 'rearview_mirror'}, {'frequency': 'c', 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'id': 877, 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'id': 878, 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'c', 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'id': 879, 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'f', 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'id': 880, 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'id': 881, 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'id': 882, 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'id': 883, 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'c', 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'id': 884, 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'synset': 'ring.n.08', 'synonyms': ['ring'], 'id': 885, 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'id': 886, 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'id': 887, 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'synset': 'robe.n.01', 'synonyms': ['robe'], 'id': 888, 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'id': 889, 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'synset': 'rodent.n.01', 'synonyms': ['rodent'], 'id': 890, 'def': 'relatively small placental mammals having a single pair of constantly growing incisor teeth specialized for gnawing', 'name': 'rodent'}, {'frequency': 'r', 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'id': 891, 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'id': 892, 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'id': 893, 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'id': 894, 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'id': 895, 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'id': 896, 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'id': 897, 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'id': 898, 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'id': 899, 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'id': 900, 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'id': 901, 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'id': 902, 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'f', 'synset': 'sail.n.01', 'synonyms': ['sail'], 'id': 903, 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'f', 'synset': 'salad.n.01', 'synonyms': ['salad'], 'id': 904, 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'id': 905, 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'c', 'synset': 'salami.n.01', 'synonyms': ['salami'], 'id': 906, 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'c', 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'id': 907, 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'id': 908, 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'c', 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'id': 909, 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'id': 910, 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'id': 911, 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'id': 912, 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'id': 913, 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'id': 914, 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'id': 915, 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'id': 916, 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'id': 917, 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'id': 918, 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'id': 919, 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'id': 920, 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'id': 921, 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'id': 922, 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'id': 923, 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'f', 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'id': 924, 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'r', 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'id': 925, 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'c', 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'id': 926, 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'f', 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'id': 927, 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'id': 928, 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'c', 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'id': 929, 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'c', 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'id': 930, 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'id': 931, 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'id': 932, 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'c', 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'id': 933, 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'c', 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'id': 934, 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'id': 935, 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'c', 'synset': 'shark.n.01', 'synonyms': ['shark'], 'id': 936, 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'id': 937, 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'id': 938, 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'id': 939, 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'id': 940, 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'id': 941, 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'synset': 'shears.n.01', 'synonyms': ['shears'], 'id': 942, 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'id': 943, 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'id': 944, 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'id': 945, 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'c', 'synset': 'shield.n.02', 'synonyms': ['shield'], 'id': 946, 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'id': 947, 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'id': 948, 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'f', 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'id': 949, 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'id': 950, 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'id': 951, 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'id': 952, 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'f', 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'id': 953, 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'id': 954, 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'id': 955, 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'r', 'synset': 'shower_cap.n.01', 'synonyms': ['shower_cap'], 'id': 956, 'def': 'a tight cap worn to keep hair dry while showering', 'name': 'shower_cap'}, {'frequency': 'f', 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'id': 957, 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'id': 958, 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'f', 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'id': 959, 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'synset': 'silo.n.01', 'synonyms': ['silo'], 'id': 960, 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'synset': 'sink.n.01', 'synonyms': ['sink'], 'id': 961, 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'id': 962, 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'id': 963, 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'synset': 'ski.n.01', 'synonyms': ['ski'], 'id': 964, 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'id': 965, 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'id': 966, 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'id': 967, 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'id': 968, 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'r', 'synset': 'skullcap.n.01', 'synonyms': ['skullcap'], 'id': 969, 'def': 'rounded brimless cap fitting the crown of the head', 'name': 'skullcap'}, {'frequency': 'c', 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'id': 970, 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'id': 971, 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'id': 972, 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'id': 973, 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'id': 974, 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'id': 975, 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'id': 976, 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'id': 977, 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'id': 978, 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'synset': 'soap.n.01', 'synonyms': ['soap'], 'id': 979, 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'id': 980, 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'synset': 'sock.n.01', 'synonyms': ['sock'], 'id': 981, 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'f', 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'id': 982, 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'synset': 'softball.n.01', 'synonyms': ['softball'], 'id': 983, 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'id': 984, 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'id': 985, 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'f', 'synset': 'soup.n.01', 'synonyms': ['soup'], 'id': 986, 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'id': 987, 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'id': 988, 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'id': 989, 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'id': 990, 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'id': 991, 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'id': 992, 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'id': 993, 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'id': 994, 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'id': 995, 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'id': 996, 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'c', 'synset': 'spider.n.01', 'synonyms': ['spider'], 'id': 997, 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'r', 'synset': 'spiny_lobster.n.02', 'synonyms': ['crawfish', 'crayfish'], 'id': 998, 'def': 'large edible marine crustacean having a spiny carapace but lacking the large pincers of true lobsters', 'name': 'crawfish'}, {'frequency': 'c', 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'id': 999, 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'id': 1000, 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'id': 1001, 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'id': 1002, 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'synset': 'squid.n.01', 'synonyms': ['squid_(food)', 'calamari', 'calamary'], 'id': 1003, 'def': '(Italian cuisine) squid prepared as food', 'name': 'squid_(food)'}, {'frequency': 'c', 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'id': 1004, 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'r', 'synset': 'stagecoach.n.01', 'synonyms': ['stagecoach'], 'id': 1005, 'def': 'a large coach-and-four formerly used to carry passengers and mail on regular routes between towns', 'name': 'stagecoach'}, {'frequency': 'c', 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'id': 1006, 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'c', 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'id': 1007, 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'id': 1008, 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'id': 1009, 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'id': 1010, 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'f', 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'id': 1011, 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'id': 1012, 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'id': 1013, 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'id': 1014, 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'synset': 'stew.n.02', 'synonyms': ['stew'], 'id': 1015, 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'id': 1016, 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'id': 1017, 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'f', 'synset': 'stool.n.01', 'synonyms': ['stool'], 'id': 1018, 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'id': 1019, 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'id': 1020, 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'id': 1021, 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'id': 1022, 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'synset': 'strap.n.01', 'synonyms': ['strap'], 'id': 1023, 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'id': 1024, 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'id': 1025, 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'id': 1026, 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'id': 1027, 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'id': 1028, 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'id': 1029, 'def': 'a pointed tool for writing or drawing or engraving, including pens', 'name': 'stylus'}, {'frequency': 'r', 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'id': 1030, 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'id': 1031, 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'id': 1032, 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'f', 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'id': 1033, 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'id': 1034, 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'id': 1035, 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'id': 1036, 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'f', 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'id': 1037, 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'id': 1038, 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'synset': 'swab.n.02', 'synonyms': ['mop'], 'id': 1039, 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'id': 1040, 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'id': 1041, 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'id': 1042, 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'id': 1043, 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'id': 1044, 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'id': 1045, 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'synset': 'sword.n.01', 'synonyms': ['sword'], 'id': 1046, 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'id': 1047, 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'id': 1048, 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'id': 1049, 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'synset': 'table.n.02', 'synonyms': ['table'], 'id': 1050, 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'id': 1051, 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'id': 1052, 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'id': 1053, 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'synset': 'taco.n.02', 'synonyms': ['taco'], 'id': 1054, 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'synset': 'tag.n.02', 'synonyms': ['tag'], 'id': 1055, 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'id': 1056, 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'id': 1057, 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'id': 1058, 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'f', 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'id': 1059, 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'id': 1060, 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'f', 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'id': 1061, 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'id': 1062, 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'id': 1063, 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'id': 1064, 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'id': 1065, 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'id': 1066, 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'c', 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'id': 1067, 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'id': 1068, 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'id': 1069, 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'f', 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'id': 1070, 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'id': 1071, 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'id': 1072, 'def': 'electronic device for communicating by voice over long distances (includes wired and wireless/cell phones)', 'name': 'telephone'}, {'frequency': 'c', 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'id': 1073, 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'id': 1074, 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'id': 1075, 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'id': 1076, 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'id': 1077, 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'id': 1078, 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'id': 1079, 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'id': 1080, 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'id': 1081, 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'id': 1082, 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'f', 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'id': 1083, 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'id': 1084, 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'id': 1085, 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'id': 1086, 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'id': 1087, 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'id': 1088, 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'id': 1089, 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'id': 1090, 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'id': 1091, 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'c', 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'id': 1092, 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'id': 1093, 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'id': 1094, 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'id': 1095, 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'f', 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'id': 1096, 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'id': 1097, 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'id': 1098, 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'id': 1099, 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'f', 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'id': 1100, 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'id': 1101, 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'id': 1102, 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'id': 1103, 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'f', 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'id': 1104, 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'f', 'synset': 'top.n.09', 'synonyms': ['cover'], 'id': 1105, 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'id': 1106, 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'id': 1107, 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'synset': 'towel.n.01', 'synonyms': ['towel'], 'id': 1108, 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'id': 1109, 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'synset': 'toy.n.03', 'synonyms': ['toy'], 'id': 1110, 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'id': 1111, 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'id': 1112, 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'c', 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'id': 1113, 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'f', 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'id': 1114, 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'id': 1115, 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'id': 1116, 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'synset': 'tray.n.01', 'synonyms': ['tray'], 'id': 1117, 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'id': 1118, 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'id': 1119, 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'c', 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'id': 1120, 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'f', 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'id': 1121, 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'id': 1122, 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'synset': 'truck.n.01', 'synonyms': ['truck'], 'id': 1123, 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'id': 1124, 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'id': 1125, 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'synset': 'tub.n.02', 'synonyms': ['vat'], 'id': 1126, 'def': 'a large vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'synset': 'turban.n.01', 'synonyms': ['turban'], 'id': 1127, 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'c', 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'id': 1128, 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'id': 1129, 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'id': 1130, 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'c', 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'id': 1131, 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'c', 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'id': 1132, 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'id': 1133, 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'f', 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'id': 1134, 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'id': 1135, 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'f', 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'id': 1136, 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'c', 'synset': 'urn.n.01', 'synonyms': ['urn'], 'id': 1137, 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'id': 1138, 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'f', 'synset': 'vase.n.01', 'synonyms': ['vase'], 'id': 1139, 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'id': 1140, 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'id': 1141, 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'f', 'synset': 'vest.n.01', 'synonyms': ['vest', 'waistcoat'], 'id': 1142, 'def': "a man's sleeveless garment worn underneath a coat", 'name': 'vest'}, {'frequency': 'c', 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'id': 1143, 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'id': 1144, 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'id': 1145, 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'id': 1146, 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'c', 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'id': 1147, 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'id': 1148, 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'id': 1149, 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'id': 1150, 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'id': 1151, 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'id': 1152, 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'id': 1153, 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'id': 1154, 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'id': 1155, 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'f', 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'id': 1156, 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'id': 1157, 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'id': 1158, 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'synset': 'washbasin.n.01', 'synonyms': ['washbasin', 'basin_(for_washing)', 'washbowl', 'washstand', 'handbasin'], 'id': 1159, 'def': 'a bathroom sink that is permanently installed and connected to a water supply and drainpipe; where you can wash your hands and face', 'name': 'washbasin'}, {'frequency': 'c', 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'id': 1160, 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'id': 1161, 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'id': 1162, 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'id': 1163, 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'id': 1164, 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'id': 1165, 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'c', 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'id': 1166, 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'id': 1167, 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'id': 1168, 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'id': 1169, 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'id': 1170, 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'id': 1171, 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'f', 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'id': 1172, 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'id': 1173, 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'id': 1174, 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'id': 1175, 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'id': 1176, 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'id': 1177, 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'id': 1178, 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'id': 1179, 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'id': 1180, 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'c', 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'id': 1181, 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'c', 'synset': 'wig.n.01', 'synonyms': ['wig'], 'id': 1182, 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'id': 1183, 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'id': 1184, 'def': 'A mill or turbine that is powered by wind', 'name': 'windmill'}, {'frequency': 'c', 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'id': 1185, 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'id': 1186, 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'id': 1187, 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'id': 1188, 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'c', 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'id': 1189, 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'id': 1190, 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'f', 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'id': 1191, 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'synset': 'wok.n.01', 'synonyms': ['wok'], 'id': 1192, 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'id': 1193, 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'id': 1194, 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'id': 1195, 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'id': 1196, 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'f', 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'id': 1197, 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'id': 1198, 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'c', 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'id': 1199, 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'c', 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'id': 1200, 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'c', 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'id': 1201, 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'id': 1202, 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'id': 1203, 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}]  # noqa
+# fmt: on
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/pascal_voc.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/pascal_voc.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbbf82cb96442bfa0cf05ed0f4dddf3645434b7e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/pascal_voc.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import numpy as np
+import os
+import xml.etree.ElementTree as ET
+from typing import List, Tuple, Union
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+from detectron2.utils.file_io import PathManager
+
+__all__ = ["load_voc_instances", "register_pascal_voc"]
+
+
+# fmt: off
+CLASS_NAMES = (
+    "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
+    "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
+    "pottedplant", "sheep", "sofa", "train", "tvmonitor"
+)
+# fmt: on
+
+
+def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
+    """
+    Load Pascal VOC detection annotations to Detectron2 format.
+
+    Args:
+        dirname: Contain "Annotations", "ImageSets", "JPEGImages"
+        split (str): one of "train", "test", "val", "trainval"
+        class_names: list or tuple of class names
+    """
+    with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
+        fileids = np.loadtxt(f, dtype=np.str)
+
+    # Needs to read many small annotation files. Makes sense at local
+    annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
+    dicts = []
+    for fileid in fileids:
+        anno_file = os.path.join(annotation_dirname, fileid + ".xml")
+        jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
+
+        with PathManager.open(anno_file) as f:
+            tree = ET.parse(f)
+
+        r = {
+            "file_name": jpeg_file,
+            "image_id": fileid,
+            "height": int(tree.findall("./size/height")[0].text),
+            "width": int(tree.findall("./size/width")[0].text),
+        }
+        instances = []
+
+        for obj in tree.findall("object"):
+            cls = obj.find("name").text
+            # We include "difficult" samples in training.
+            # Based on limited experiments, they don't hurt accuracy.
+            # difficult = int(obj.find("difficult").text)
+            # if difficult == 1:
+            # continue
+            bbox = obj.find("bndbox")
+            bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
+            # Original annotations are integers in the range [1, W or H]
+            # Assuming they mean 1-based pixel indices (inclusive),
+            # a box with annotation (xmin=1, xmax=W) covers the whole image.
+            # In coordinate space this is represented by (xmin=0, xmax=W)
+            bbox[0] -= 1.0
+            bbox[1] -= 1.0
+            instances.append(
+                {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
+            )
+        r["annotations"] = instances
+        dicts.append(r)
+    return dicts
+
+
+def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES):
+    DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names))
+    MetadataCatalog.get(name).set(
+        thing_classes=list(class_names), dirname=dirname, year=year, split=split
+    )
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/register_coco.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/register_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..e564438d5bf016bcdbb65b4bbdc215d79f579f8a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/datasets/register_coco.py
@@ -0,0 +1,3 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .coco import register_coco_instances  # noqa
+from .coco_panoptic import register_coco_panoptic_separated  # noqa
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/detection_utils.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/detection_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2b485ed937ea8ecc5c7efdf976a8d22306f5b57
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/detection_utils.py
@@ -0,0 +1,605 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+Common data processing utilities that are used in a
+typical object detection data pipeline.
+"""
+import logging
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+from PIL import Image
+
+from detectron2.structures import (
+    BitMasks,
+    Boxes,
+    BoxMode,
+    Instances,
+    Keypoints,
+    PolygonMasks,
+    RotatedBoxes,
+    polygons_to_bitmask,
+)
+from detectron2.utils.file_io import PathManager
+
+from . import transforms as T
+from .catalog import MetadataCatalog
+
+__all__ = [
+    "SizeMismatchError",
+    "convert_image_to_rgb",
+    "check_image_size",
+    "transform_proposals",
+    "transform_instance_annotations",
+    "annotations_to_instances",
+    "annotations_to_instances_rotated",
+    "build_augmentation",
+    "build_transform_gen",
+    "create_keypoint_hflip_indices",
+    "filter_empty_instances",
+    "read_image",
+]
+
+
+class SizeMismatchError(ValueError):
+    """
+    When loaded image has difference width/height compared with annotation.
+    """
+
+
+# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
+_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
+_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]]
+
+# https://www.exiv2.org/tags.html
+_EXIF_ORIENT = 274  # exif 'Orientation' tag
+
+
+def convert_PIL_to_numpy(image, format):
+    """
+    Convert PIL image to numpy array of target format.
+
+    Args:
+        image (PIL.Image): a PIL image
+        format (str): the format of output image
+
+    Returns:
+        (np.ndarray): also see `read_image`
+    """
+    if format is not None:
+        # PIL only supports RGB, so convert to RGB and flip channels over below
+        conversion_format = format
+        if format in ["BGR", "YUV-BT.601"]:
+            conversion_format = "RGB"
+        image = image.convert(conversion_format)
+    image = np.asarray(image)
+    # PIL squeezes out the channel dimension for "L", so make it HWC
+    if format == "L":
+        image = np.expand_dims(image, -1)
+
+    # handle formats not supported by PIL
+    elif format == "BGR":
+        # flip channels if needed
+        image = image[:, :, ::-1]
+    elif format == "YUV-BT.601":
+        image = image / 255.0
+        image = np.dot(image, np.array(_M_RGB2YUV).T)
+
+    return image
+
+
+def convert_image_to_rgb(image, format):
+    """
+    Convert an image from given format to RGB.
+
+    Args:
+        image (np.ndarray or Tensor): an HWC image
+        format (str): the format of input image, also see `read_image`
+
+    Returns:
+        (np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8
+    """
+    if isinstance(image, torch.Tensor):
+        image = image.cpu().numpy()
+    if format == "BGR":
+        image = image[:, :, [2, 1, 0]]
+    elif format == "YUV-BT.601":
+        image = np.dot(image, np.array(_M_YUV2RGB).T)
+        image = image * 255.0
+    else:
+        if format == "L":
+            image = image[:, :, 0]
+        image = image.astype(np.uint8)
+        image = np.asarray(Image.fromarray(image, mode=format).convert("RGB"))
+    return image
+
+
+def _apply_exif_orientation(image):
+    """
+    Applies the exif orientation correctly.
+
+    This code exists per the bug:
+      https://github.com/python-pillow/Pillow/issues/3973
+    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
+    various methods, especially `tobytes`
+
+    Function based on:
+      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
+      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
+
+    Args:
+        image (PIL.Image): a PIL image
+
+    Returns:
+        (PIL.Image): the PIL image with exif orientation applied, if applicable
+    """
+    if not hasattr(image, "getexif"):
+        return image
+
+    try:
+        exif = image.getexif()
+    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
+        exif = None
+
+    if exif is None:
+        return image
+
+    orientation = exif.get(_EXIF_ORIENT)
+
+    method = {
+        2: Image.FLIP_LEFT_RIGHT,
+        3: Image.ROTATE_180,
+        4: Image.FLIP_TOP_BOTTOM,
+        5: Image.TRANSPOSE,
+        6: Image.ROTATE_270,
+        7: Image.TRANSVERSE,
+        8: Image.ROTATE_90,
+    }.get(orientation)
+
+    if method is not None:
+        return image.transpose(method)
+    return image
+
+
+def read_image(file_name, format=None):
+    """
+    Read an image into the given format.
+    Will apply rotation and flipping if the image has such exif information.
+
+    Args:
+        file_name (str): image file path
+        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
+
+    Returns:
+        image (np.ndarray):
+            an HWC image in the given format, which is 0-255, uint8 for
+            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
+    """
+    with PathManager.open(file_name, "rb") as f:
+        image = Image.open(f)
+
+        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
+        image = _apply_exif_orientation(image)
+        return convert_PIL_to_numpy(image, format)
+
+
+def check_image_size(dataset_dict, image):
+    """
+    Raise an error if the image does not match the size specified in the dict.
+    """
+    if "width" in dataset_dict or "height" in dataset_dict:
+        image_wh = (image.shape[1], image.shape[0])
+        expected_wh = (dataset_dict["width"], dataset_dict["height"])
+        if not image_wh == expected_wh:
+            raise SizeMismatchError(
+                "Mismatched image shape{}, got {}, expect {}.".format(
+                    " for image " + dataset_dict["file_name"]
+                    if "file_name" in dataset_dict
+                    else "",
+                    image_wh,
+                    expected_wh,
+                )
+                + " Please check the width/height in your annotation."
+            )
+
+    # To ensure bbox always remap to original image size
+    if "width" not in dataset_dict:
+        dataset_dict["width"] = image.shape[1]
+    if "height" not in dataset_dict:
+        dataset_dict["height"] = image.shape[0]
+
+
+def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0):
+    """
+    Apply transformations to the proposals in dataset_dict, if any.
+
+    Args:
+        dataset_dict (dict): a dict read from the dataset, possibly
+            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
+        image_shape (tuple): height, width
+        transforms (TransformList):
+        proposal_topk (int): only keep top-K scoring proposals
+        min_box_size (int): proposals with either side smaller than this
+            threshold are removed
+
+    The input dict is modified in-place, with abovementioned keys removed. A new
+    key "proposals" will be added. Its value is an `Instances`
+    object which contains the transformed proposals in its field
+    "proposal_boxes" and "objectness_logits".
+    """
+    if "proposal_boxes" in dataset_dict:
+        # Transform proposal boxes
+        boxes = transforms.apply_box(
+            BoxMode.convert(
+                dataset_dict.pop("proposal_boxes"),
+                dataset_dict.pop("proposal_bbox_mode"),
+                BoxMode.XYXY_ABS,
+            )
+        )
+        boxes = Boxes(boxes)
+        objectness_logits = torch.as_tensor(
+            dataset_dict.pop("proposal_objectness_logits").astype("float32")
+        )
+
+        boxes.clip(image_shape)
+        keep = boxes.nonempty(threshold=min_box_size)
+        boxes = boxes[keep]
+        objectness_logits = objectness_logits[keep]
+
+        proposals = Instances(image_shape)
+        proposals.proposal_boxes = boxes[:proposal_topk]
+        proposals.objectness_logits = objectness_logits[:proposal_topk]
+        dataset_dict["proposals"] = proposals
+
+
+def transform_instance_annotations(
+    annotation, transforms, image_size, *, keypoint_hflip_indices=None
+):
+    """
+    Apply transforms to box, segmentation and keypoints annotations of a single instance.
+
+    It will use `transforms.apply_box` for the box, and
+    `transforms.apply_coords` for segmentation polygons & keypoints.
+    If you need anything more specially designed for each data structure,
+    you'll need to implement your own version of this function or the transforms.
+
+    Args:
+        annotation (dict): dict of instance annotations for a single instance.
+            It will be modified in-place.
+        transforms (TransformList or list[Transform]):
+        image_size (tuple): the height, width of the transformed image
+        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
+
+    Returns:
+        dict:
+            the same input dict with fields "bbox", "segmentation", "keypoints"
+            transformed according to `transforms`.
+            The "bbox_mode" field will be set to XYXY_ABS.
+    """
+    if isinstance(transforms, (tuple, list)):
+        transforms = T.TransformList(transforms)
+    # bbox is 1d (per-instance bounding box)
+    bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
+    # clip transformed bbox to image size
+    bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
+    annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1])
+    annotation["bbox_mode"] = BoxMode.XYXY_ABS
+    if "segmentation" in annotation:
+        # each instance contains 1 or more polygons
+        segm = annotation["segmentation"]
+        if isinstance(segm, list):
+            # polygons
+            polygons = [np.asarray(p).reshape(-1, 2) for p in segm]
+            annotation["segmentation"] = [
+                p.reshape(-1) for p in transforms.apply_polygons(polygons)
+            ]
+        elif isinstance(segm, dict):
+            # RLE
+            mask = mask_util.decode(segm)
+            mask = transforms.apply_segmentation(mask)
+            assert tuple(mask.shape[:2]) == image_size
+            annotation["segmentation"] = mask
+        else:
+            raise ValueError(
+                "Cannot transform segmentation of type '{}'!"
+                "Supported types are: polygons as list[list[float] or ndarray],"
+                " COCO-style RLE as a dict.".format(type(segm))
+            )
+    if "keypoints" in annotation:
+        keypoints = transform_keypoint_annotations(
+            annotation["keypoints"], transforms, image_size, keypoint_hflip_indices
+        )
+        annotation["keypoints"] = keypoints
+    return annotation
+
+
+def transform_keypoint_annotations(keypoints, transforms, image_size, keypoint_hflip_indices=None):
+    """
+    Transform keypoint annotations of an image.
+    If a keypoint is transformed out of image boundary, it will be marked "unlabeled" (visibility=0)
+
+    Args:
+        keypoints (list[float]): Nx3 float in Detectron2's Dataset format.
+            Each point is represented by (x, y, visibility).
+        transforms (TransformList):
+        image_size (tuple): the height, width of the transformed image
+        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
+            When `transforms` includes horizontal flip, will use the index
+            mapping to flip keypoints.
+    """
+    # (N*3,) -> (N, 3)
+    keypoints = np.asarray(keypoints, dtype="float64").reshape(-1, 3)
+    keypoints_xy = transforms.apply_coords(keypoints[:, :2])
+
+    # Set all out-of-boundary points to "unlabeled"
+    inside = (keypoints_xy >= np.array([0, 0])) & (keypoints_xy <= np.array(image_size[::-1]))
+    inside = inside.all(axis=1)
+    keypoints[:, :2] = keypoints_xy
+    keypoints[:, 2][~inside] = 0
+
+    # This assumes that HorizFlipTransform is the only one that does flip
+    do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+
+    # Alternative way: check if probe points was horizontally flipped.
+    # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]])
+    # probe_aug = transforms.apply_coords(probe.copy())
+    # do_hflip = np.sign(probe[1][0] - probe[0][0]) != np.sign(probe_aug[1][0] - probe_aug[0][0])  # noqa
+
+    # If flipped, swap each keypoint with its opposite-handed equivalent
+    if do_hflip:
+        assert keypoint_hflip_indices is not None
+        keypoints = keypoints[keypoint_hflip_indices, :]
+
+    # Maintain COCO convention that if visibility == 0 (unlabeled), then x, y = 0
+    keypoints[keypoints[:, 2] == 0] = 0
+    return keypoints
+
+
+def annotations_to_instances(annos, image_size, mask_format="polygon"):
+    """
+    Create an :class:`Instances` object used by the models,
+    from instance annotations in the dataset dict.
+
+    Args:
+        annos (list[dict]): a list of instance annotations in one image, each
+            element for one instance.
+        image_size (tuple): height, width
+
+    Returns:
+        Instances:
+            It will contain fields "gt_boxes", "gt_classes",
+            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
+            This is the format that builtin models expect.
+    """
+    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
+    target = Instances(image_size)
+    target.gt_boxes = Boxes(boxes)
+    boxes_feat = [BoxMode.convert(obj["bbox"]/4, obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
+    target.gt_boxes_feat = Boxes(boxes_feat)
+    classes = [int(obj["category_id"]) for obj in annos]
+    classes = torch.tensor(classes, dtype=torch.int64)
+    target.gt_classes = classes
+    
+    rec = [obj["rec"] for obj in annos]
+    rec = torch.tensor(rec, dtype=torch.int64)
+    target.rec = rec
+    
+    if len(annos) and "segmentation" in annos[0]:
+        segms = [obj["segmentation"] for obj in annos]
+        #segms_feat = [[obj["segmentation"][0]/4] for obj in annos]
+        if mask_format == "polygon":
+            try:
+                masks = PolygonMasks(segms)
+                #masks_feat = PolygonMasks(segms_feat)
+            except ValueError as e:
+                raise ValueError(
+                    "Failed to use mask_format=='polygon' from the given annotations!"
+                ) from e
+        else:
+            assert mask_format == "bitmask", mask_format
+            masks = []
+            for segm in segms:
+                if isinstance(segm, list):
+                    # polygon
+                    masks.append(polygons_to_bitmask(segm, *image_size))
+                elif isinstance(segm, dict):
+                    # COCO RLE
+                    masks.append(mask_util.decode(segm))
+                elif isinstance(segm, np.ndarray):
+                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
+                        segm.ndim
+                    )
+                    # mask array
+                    masks.append(segm)
+                else:
+                    raise ValueError(
+                        "Cannot convert segmentation of type '{}' to BitMasks!"
+                        "Supported types are: polygons as list[list[float] or ndarray],"
+                        " COCO-style RLE as a dict, or a binary segmentation mask "
+                        " in a 2D numpy array of shape HxW.".format(type(segm))
+                    )
+            # torch.from_numpy does not support array with negative stride.
+            masks = BitMasks(
+                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
+            )
+        target.gt_masks = masks
+        #target.gt_masks_feat = masks_feat
+    if len(annos) and "keypoints" in annos[0]:
+        kpts = [obj.get("keypoints", []) for obj in annos]
+        target.gt_keypoints = Keypoints(kpts)
+
+    return target
+
+
+def annotations_to_instances_rotated(annos, image_size):
+    """
+    Create an :class:`Instances` object used by the models,
+    from instance annotations in the dataset dict.
+    Compared to `annotations_to_instances`, this function is for rotated boxes only
+
+    Args:
+        annos (list[dict]): a list of instance annotations in one image, each
+            element for one instance.
+        image_size (tuple): height, width
+
+    Returns:
+        Instances:
+            Containing fields "gt_boxes", "gt_classes",
+            if they can be obtained from `annos`.
+            This is the format that builtin models expect.
+    """
+    boxes = [obj["bbox"] for obj in annos]
+    target = Instances(image_size)
+    boxes = target.gt_boxes = RotatedBoxes(boxes)
+    boxes.clip(image_size)
+
+    classes = [obj["category_id"] for obj in annos]
+    classes = torch.tensor(classes, dtype=torch.int64)
+    target.gt_classes = classes
+
+    return target
+
+
+def filter_empty_instances(instances, by_box=True, by_mask=True, box_threshold=1e-5):
+    """
+    Filter out empty instances in an `Instances` object.
+
+    Args:
+        instances (Instances):
+        by_box (bool): whether to filter out instances with empty boxes
+        by_mask (bool): whether to filter out instances with empty masks
+        box_threshold (float): minimum width and height to be considered non-empty
+
+    Returns:
+        Instances: the filtered instances.
+    """
+    assert by_box or by_mask
+    r = []
+    if by_box:
+        r.append(instances.gt_boxes.nonempty(threshold=box_threshold))
+    if instances.has("gt_masks") and by_mask:
+        r.append(instances.gt_masks.nonempty())
+
+    # TODO: can also filter visible keypoints
+
+    if not r:
+        return instances
+    m = r[0]
+    for x in r[1:]:
+        m = m & x
+    return instances[m]
+
+
+def create_keypoint_hflip_indices(dataset_names):
+    """
+    Args:
+        dataset_names (list[str]): list of dataset names
+    Returns:
+        ndarray[int]: a vector of size=#keypoints, storing the
+        horizontally-flipped keypoint indices.
+    """
+
+    check_metadata_consistency("keypoint_names", dataset_names)
+    check_metadata_consistency("keypoint_flip_map", dataset_names)
+
+    meta = MetadataCatalog.get(dataset_names[0])
+    names = meta.keypoint_names
+    # TODO flip -> hflip
+    flip_map = dict(meta.keypoint_flip_map)
+    flip_map.update({v: k for k, v in flip_map.items()})
+    flipped_names = [i if i not in flip_map else flip_map[i] for i in names]
+    flip_indices = [names.index(i) for i in flipped_names]
+    return np.asarray(flip_indices, dtype=np.int32)
+
+
+def gen_crop_transform_with_instance(crop_size, image_size, instance):
+    """
+    Generate a CropTransform so that the cropping region contains
+    the center of the given instance.
+
+    Args:
+        crop_size (tuple): h, w in pixels
+        image_size (tuple): h, w
+        instance (dict): an annotation dict of one instance, in Detectron2's
+            dataset format.
+    """
+    crop_size = np.asarray(crop_size, dtype=np.int32)
+    bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
+    center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
+    assert (
+        image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
+    ), "The annotation bounding box is outside of the image!"
+    assert (
+        image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
+    ), "Crop size is larger than image size!"
+
+    min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
+    max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
+    max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
+
+    y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
+    x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
+    return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
+
+
+def check_metadata_consistency(key, dataset_names):
+    """
+    Check that the datasets have consistent metadata.
+
+    Args:
+        key (str): a metadata key
+        dataset_names (list[str]): a list of dataset names
+
+    Raises:
+        AttributeError: if the key does not exist in the metadata
+        ValueError: if the given datasets do not have the same metadata values defined by key
+    """
+    if len(dataset_names) == 0:
+        return
+    logger = logging.getLogger(__name__)
+    entries_per_dataset = [getattr(MetadataCatalog.get(d), key) for d in dataset_names]
+    for idx, entry in enumerate(entries_per_dataset):
+        if entry != entries_per_dataset[0]:
+            logger.error(
+                "Metadata '{}' for dataset '{}' is '{}'".format(key, dataset_names[idx], str(entry))
+            )
+            logger.error(
+                "Metadata '{}' for dataset '{}' is '{}'".format(
+                    key, dataset_names[0], str(entries_per_dataset[0])
+                )
+            )
+            raise ValueError("Datasets have different metadata '{}'!".format(key))
+
+
+def build_augmentation(cfg, is_train):
+    """
+    Create a list of default :class:`Augmentation` from config.
+    Now it includes resizing and flipping.
+
+    Returns:
+        list[Augmentation]
+    """
+    if is_train:
+        min_size = cfg.INPUT.MIN_SIZE_TRAIN
+        max_size = cfg.INPUT.MAX_SIZE_TRAIN
+        sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
+    else:
+        min_size = cfg.INPUT.MIN_SIZE_TEST
+        max_size = cfg.INPUT.MAX_SIZE_TEST
+        sample_style = "choice"
+    augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)]
+    if is_train and cfg.INPUT.RANDOM_FLIP != "none":
+        augmentation.append(
+            T.RandomFlip(
+                horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
+                vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
+            )
+        )
+    return augmentation
+
+
+build_transform_gen = build_augmentation
+"""
+Alias for backward-compatibility.
+"""
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/samplers/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/samplers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bacd895756cedbc9b37fe24af6dbcd8a054246b
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/samplers/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
+from .grouped_batch_sampler import GroupedBatchSampler
+
+__all__ = [
+    "GroupedBatchSampler",
+    "TrainingSampler",
+    "InferenceSampler",
+    "RepeatFactorTrainingSampler",
+]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/samplers/distributed_sampler.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/samplers/distributed_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0e8da28822ec071a04caaac2069e6148bc90622
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/samplers/distributed_sampler.py
@@ -0,0 +1,200 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import math
+from collections import defaultdict
+from typing import Optional
+import torch
+from torch.utils.data.sampler import Sampler
+
+from detectron2.utils import comm
+
+
+class TrainingSampler(Sampler):
+    """
+    In training, we only care about the "infinite stream" of training data.
+    So this sampler produces an infinite stream of indices and
+    all workers cooperate to correctly shuffle the indices and sample different indices.
+
+    The samplers in each worker effectively produces `indices[worker_id::num_workers]`
+    where `indices` is an infinite stream of indices consisting of
+    `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
+    or `range(size) + range(size) + ...` (if shuffle is False)
+    """
+
+    def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None):
+        """
+        Args:
+            size (int): the total number of data of the underlying dataset to sample from
+            shuffle (bool): whether to shuffle the indices or not
+            seed (int): the initial seed of the shuffle. Must be the same
+                across all workers. If None, will use a random seed shared
+                among workers (require synchronization among all workers).
+        """
+        self._size = size
+        assert size > 0
+        self._shuffle = shuffle
+        if seed is None:
+            seed = comm.shared_random_seed()
+        self._seed = int(seed)
+
+        self._rank = comm.get_rank()
+        self._world_size = comm.get_world_size()
+
+    def __iter__(self):
+        start = self._rank
+        yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
+
+    def _infinite_indices(self):
+        g = torch.Generator()
+        g.manual_seed(self._seed)
+        while True:
+            if self._shuffle:
+                yield from torch.randperm(self._size, generator=g).tolist()
+            else:
+                yield from torch.arange(self._size).tolist()
+
+
+class RepeatFactorTrainingSampler(Sampler):
+    """
+    Similar to TrainingSampler, but a sample may appear more times than others based
+    on its "repeat factor". This is suitable for training on class imbalanced datasets like LVIS.
+    """
+
+    def __init__(self, repeat_factors, *, shuffle=True, seed=None):
+        """
+        Args:
+            repeat_factors (Tensor): a float vector, the repeat factor for each indice. When it's
+                full of ones, it is equivalent to ``TrainingSampler(len(repeat_factors), ...)``.
+            shuffle (bool): whether to shuffle the indices or not
+            seed (int): the initial seed of the shuffle. Must be the same
+                across all workers. If None, will use a random seed shared
+                among workers (require synchronization among all workers).
+        """
+        self._shuffle = shuffle
+        if seed is None:
+            seed = comm.shared_random_seed()
+        self._seed = int(seed)
+
+        self._rank = comm.get_rank()
+        self._world_size = comm.get_world_size()
+
+        # Split into whole number (_int_part) and fractional (_frac_part) parts.
+        self._int_part = torch.trunc(repeat_factors)
+        self._frac_part = repeat_factors - self._int_part
+
+    @staticmethod
+    def repeat_factors_from_category_frequency(dataset_dicts, repeat_thresh):
+        """
+        Compute (fractional) per-image repeat factors based on category frequency.
+        The repeat factor for an image is a function of the frequency of the rarest
+        category labeled in that image. The "frequency of category c" in [0, 1] is defined
+        as the fraction of images in the training set (without repeats) in which category c
+        appears.
+        See :paper:`lvis` (>= v2) Appendix B.2.
+
+        Args:
+            dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
+            repeat_thresh (float): frequency threshold below which data is repeated.
+                If the frequency is half of `repeat_thresh`, the image will be
+                repeated twice.
+
+        Returns:
+            torch.Tensor:
+                the i-th element is the repeat factor for the dataset image at index i.
+        """
+        # 1. For each category c, compute the fraction of images that contain it: f(c)
+        category_freq = defaultdict(int)
+        for dataset_dict in dataset_dicts:  # For each image (without repeats)
+            cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]}
+            for cat_id in cat_ids:
+                category_freq[cat_id] += 1
+        num_images = len(dataset_dicts)
+        for k, v in category_freq.items():
+            category_freq[k] = v / num_images
+
+        # 2. For each category c, compute the category-level repeat factor:
+        #    r(c) = max(1, sqrt(t / f(c)))
+        category_rep = {
+            cat_id: max(1.0, math.sqrt(repeat_thresh / cat_freq))
+            for cat_id, cat_freq in category_freq.items()
+        }
+
+        # 3. For each image I, compute the image-level repeat factor:
+        #    r(I) = max_{c in I} r(c)
+        rep_factors = []
+        for dataset_dict in dataset_dicts:
+            cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]}
+            rep_factor = max({category_rep[cat_id] for cat_id in cat_ids}, default=1.0)
+            rep_factors.append(rep_factor)
+
+        return torch.tensor(rep_factors, dtype=torch.float32)
+
+    def _get_epoch_indices(self, generator):
+        """
+        Create a list of dataset indices (with repeats) to use for one epoch.
+
+        Args:
+            generator (torch.Generator): pseudo random number generator used for
+                stochastic rounding.
+
+        Returns:
+            torch.Tensor: list of dataset indices to use in one epoch. Each index
+                is repeated based on its calculated repeat factor.
+        """
+        # Since repeat factors are fractional, we use stochastic rounding so
+        # that the target repeat factor is achieved in expectation over the
+        # course of training
+        rands = torch.rand(len(self._frac_part), generator=generator)
+        rep_factors = self._int_part + (rands < self._frac_part).float()
+        # Construct a list of indices in which we repeat images as specified
+        indices = []
+        for dataset_index, rep_factor in enumerate(rep_factors):
+            indices.extend([dataset_index] * int(rep_factor.item()))
+        return torch.tensor(indices, dtype=torch.int64)
+
+    def __iter__(self):
+        start = self._rank
+        yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
+
+    def _infinite_indices(self):
+        g = torch.Generator()
+        g.manual_seed(self._seed)
+        while True:
+            # Sample indices with repeats determined by stochastic rounding; each
+            # "epoch" may have a slightly different size due to the rounding.
+            indices = self._get_epoch_indices(g)
+            if self._shuffle:
+                randperm = torch.randperm(len(indices), generator=g)
+                yield from indices[randperm].tolist()
+            else:
+                yield from indices.tolist()
+
+
+class InferenceSampler(Sampler):
+    """
+    Produce indices for inference across all workers.
+    Inference needs to run on the __exact__ set of samples,
+    therefore when the total number of samples is not divisible by the number of workers,
+    this sampler produces different number of samples on different workers.
+    """
+
+    def __init__(self, size: int):
+        """
+        Args:
+            size (int): the total number of data of the underlying dataset to sample from
+        """
+        self._size = size
+        assert size > 0
+        self._rank = comm.get_rank()
+        self._world_size = comm.get_world_size()
+
+        shard_size = (self._size - 1) // self._world_size + 1
+        begin = shard_size * self._rank
+        end = min(shard_size * (self._rank + 1), self._size)
+        self._local_indices = range(begin, end)
+
+    def __iter__(self):
+        yield from self._local_indices
+
+    def __len__(self):
+        return len(self._local_indices)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/samplers/grouped_batch_sampler.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/samplers/grouped_batch_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b247730aacd04dd0c752664acde3257c4eddd71
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/samplers/grouped_batch_sampler.py
@@ -0,0 +1,47 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from torch.utils.data.sampler import BatchSampler, Sampler
+
+
+class GroupedBatchSampler(BatchSampler):
+    """
+    Wraps another sampler to yield a mini-batch of indices.
+    It enforces that the batch only contain elements from the same group.
+    It also tries to provide mini-batches which follows an ordering which is
+    as close as possible to the ordering from the original sampler.
+    """
+
+    def __init__(self, sampler, group_ids, batch_size):
+        """
+        Args:
+            sampler (Sampler): Base sampler.
+            group_ids (list[int]): If the sampler produces indices in range [0, N),
+                `group_ids` must be a list of `N` ints which contains the group id of each sample.
+                The group ids must be a set of integers in the range [0, num_groups).
+            batch_size (int): Size of mini-batch.
+        """
+        if not isinstance(sampler, Sampler):
+            raise ValueError(
+                "sampler should be an instance of "
+                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
+            )
+        self.sampler = sampler
+        self.group_ids = np.asarray(group_ids)
+        assert self.group_ids.ndim == 1
+        self.batch_size = batch_size
+        groups = np.unique(self.group_ids).tolist()
+
+        # buffer the indices of each group until batch size is reached
+        self.buffer_per_group = {k: [] for k in groups}
+
+    def __iter__(self):
+        for idx in self.sampler:
+            group_id = self.group_ids[idx]
+            group_buffer = self.buffer_per_group[group_id]
+            group_buffer.append(idx)
+            if len(group_buffer) == self.batch_size:
+                yield group_buffer[:]  # yield a copy of the list
+                del group_buffer[:]
+
+    def __len__(self):
+        raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.")
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab3c63b5b456a7fb878757e25768a3634f76ae5b
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from fvcore.transforms.transform import Transform, TransformList  # order them first
+from fvcore.transforms.transform import *
+from .transform import *
+from .augmentation import *
+from .augmentation_impl import *
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/augmentation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/augmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..917290bf634b4bbd21fd70fbc14dabd6705fea33
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/augmentation.py
@@ -0,0 +1,377 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import inspect
+import numpy as np
+import pprint
+from typing import Any, List, Optional, Tuple, Union
+from fvcore.transforms.transform import Transform, TransformList
+
+"""
+See "Data Augmentation" tutorial for an overview of the system:
+https://detectron2.readthedocs.io/tutorials/augmentation.html
+"""
+
+
+__all__ = [
+    "Augmentation",
+    "AugmentationList",
+    "AugInput",
+    "TransformGen",
+    "apply_transform_gens",
+    "StandardAugInput",
+    "apply_augmentations",
+]
+
+
+def _check_img_dtype(img):
+    assert isinstance(img, np.ndarray), "[Augmentation] Needs an numpy array, but got a {}!".format(
+        type(img)
+    )
+    assert not isinstance(img.dtype, np.integer) or (
+        img.dtype == np.uint8
+    ), "[Augmentation] Got image of type {}, use uint8 or floating points instead!".format(
+        img.dtype
+    )
+    assert img.ndim in [2, 3], img.ndim
+
+
+def _get_aug_input_args(aug, aug_input) -> List[Any]:
+    """
+    Get the arguments to be passed to ``aug.get_transform`` from the input ``aug_input``.
+    """
+    if aug.input_args is None:
+        # Decide what attributes are needed automatically
+        prms = list(inspect.signature(aug.get_transform).parameters.items())
+        # The default behavior is: if there is one parameter, then its "image"
+        # (work automatically for majority of use cases, and also avoid BC breaking),
+        # Otherwise, use the argument names.
+        if len(prms) == 1:
+            names = ("image",)
+        else:
+            names = []
+            for name, prm in prms:
+                if prm.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
+                    raise TypeError(
+                        f""" \
+The default implementation of `{type(aug)}.__call__` does not allow \
+`{type(aug)}.get_transform` to use variable-length arguments (*args, **kwargs)! \
+If arguments are unknown, reimplement `__call__` instead. \
+"""
+                    )
+                names.append(name)
+        aug.input_args = tuple(names)
+
+    args = []
+    for f in aug.input_args:
+        try:
+            args.append(getattr(aug_input, f))
+        except AttributeError as e:
+            raise AttributeError(
+                f"{type(aug)}.get_transform needs input attribute '{f}', "
+                f"but it is not an attribute of {type(aug_input)}!"
+            ) from e
+    return args
+
+
+class Augmentation:
+    """
+    Augmentation defines (often random) policies/strategies to generate :class:`Transform`
+    from data. It is often used for pre-processing of input data.
+
+    A "policy" that generates a :class:`Transform` may, in the most general case,
+    need arbitrary information from input data in order to determine what transforms
+    to apply. Therefore, each :class:`Augmentation` instance defines the arguments
+    needed by its :meth:`get_transform` method. When called with the positional arguments,
+    the :meth:`get_transform` method executes the policy.
+
+    Note that :class:`Augmentation` defines the policies to create a :class:`Transform`,
+    but not how to execute the actual transform operations to those data.
+    Its :meth:`__call__` method will use :meth:`AugInput.transform` to execute the transform.
+
+    The returned `Transform` object is meant to describe deterministic transformation, which means
+    it can be re-applied on associated data, e.g. the geometry of an image and its segmentation
+    masks need to be transformed together.
+    (If such re-application is not needed, then determinism is not a crucial requirement.)
+    """
+
+    input_args: Optional[Tuple[str]] = None
+    """
+    Stores the attribute names needed by :meth:`get_transform`, e.g.  ``("image", "sem_seg")``.
+    By default, it is just a tuple of argument names in :meth:`self.get_transform`, which often only
+    contain "image". As long as the argument name convention is followed, there is no need for
+    users to touch this attribute.
+    """
+
+    def _init(self, params=None):
+        if params:
+            for k, v in params.items():
+                if k != "self" and not k.startswith("_"):
+                    setattr(self, k, v)
+
+    def get_transform(self, *args) -> Transform:
+        """
+        Execute the policy based on input data, and decide what transform to apply to inputs.
+
+        Args:
+            args: Any fixed-length positional arguments. By default, the name of the arguments
+                should exist in the :class:`AugInput` to be used.
+
+        Returns:
+            Transform: Returns the deterministic transform to apply to the input.
+
+        Examples:
+        ::
+            class MyAug:
+                # if a policy needs to know both image and semantic segmentation
+                def get_transform(image, sem_seg) -> T.Transform:
+                    pass
+            tfm: Transform = MyAug().get_transform(image, sem_seg)
+            new_image = tfm.apply_image(image)
+
+        Notes:
+            Users can freely use arbitrary new argument names in custom
+            :meth:`get_transform` method, as long as they are available in the
+            input data. In detectron2 we use the following convention:
+
+            * image: (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or
+              floating point in range [0, 1] or [0, 255].
+            * boxes: (N,4) ndarray of float32. It represents the instance bounding boxes
+              of N instances. Each is in XYXY format in unit of absolute coordinates.
+            * sem_seg: (H,W) ndarray of type uint8. Each element is an integer label of pixel.
+
+            We do not specify convention for other types and do not include builtin
+            :class:`Augmentation` that uses other types in detectron2.
+        """
+        raise NotImplementedError
+
+    def __call__(self, aug_input) -> Transform:
+        """
+        Augment the given `aug_input` **in-place**, and return the transform that's used.
+
+        This method will be called to apply the augmentation. In most augmentation, it
+        is enough to use the default implementation, which calls :meth:`get_transform`
+        using the inputs. But a subclass can overwrite it to have more complicated logic.
+
+        Args:
+            aug_input (AugInput): an object that has attributes needed by this augmentation
+                (defined by ``self.get_transform``). Its ``transform`` method will be called
+                to in-place transform it.
+
+        Returns:
+            Transform: the transform that is applied on the input.
+        """
+        args = _get_aug_input_args(self, aug_input)
+        tfm = self.get_transform(*args)
+        assert isinstance(tfm, (Transform, TransformList)), (
+            f"{type(self)}.get_transform must return an instance of Transform! "
+            "Got {type(tfm)} instead."
+        )
+        aug_input.transform(tfm)
+        return tfm
+
+    def _rand_range(self, low=1.0, high=None, size=None):
+        """
+        Uniform float random number between low and high.
+        """
+        if high is None:
+            low, high = 0, low
+        if size is None:
+            size = []
+        return np.random.uniform(low, high, size)
+
+    def __repr__(self):
+        """
+        Produce something like:
+        "MyAugmentation(field1={self.field1}, field2={self.field2})"
+        """
+        try:
+            sig = inspect.signature(self.__init__)
+            classname = type(self).__name__
+            argstr = []
+            for name, param in sig.parameters.items():
+                assert (
+                    param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD
+                ), "The default __repr__ doesn't support *args or **kwargs"
+                assert hasattr(self, name), (
+                    "Attribute {} not found! "
+                    "Default __repr__ only works if attributes match the constructor.".format(name)
+                )
+                attr = getattr(self, name)
+                default = param.default
+                if default is attr:
+                    continue
+                attr_str = pprint.pformat(attr)
+                if "\n" in attr_str:
+                    # don't show it if pformat decides to use >1 lines
+                    attr_str = "..."
+                argstr.append("{}={}".format(name, attr_str))
+            return "{}({})".format(classname, ", ".join(argstr))
+        except AssertionError:
+            return super().__repr__()
+
+    __str__ = __repr__
+
+
+def _transform_to_aug(tfm_or_aug):
+    """
+    Wrap Transform into Augmentation.
+    Private, used internally to implement augmentations.
+    """
+    assert isinstance(tfm_or_aug, (Transform, Augmentation)), tfm_or_aug
+    if isinstance(tfm_or_aug, Augmentation):
+        return tfm_or_aug
+    else:
+
+        class _TransformToAug(Augmentation):
+            def __init__(self, tfm: Transform):
+                self.tfm = tfm
+
+            def get_transform(self, *args):
+                return self.tfm
+
+            def __repr__(self):
+                return repr(self.tfm)
+
+            __str__ = __repr__
+
+        return _TransformToAug(tfm_or_aug)
+
+
+class AugmentationList(Augmentation):
+    """
+    Apply a sequence of augmentations.
+
+    It has ``__call__`` method to apply the augmentations.
+
+    Note that :meth:`get_transform` method is impossible (will throw error if called)
+    for :class:`AugmentationList`, because in order to apply a sequence of augmentations,
+    the kth augmentation must be applied first, to provide inputs needed by the (k+1)th
+    augmentation.
+    """
+
+    def __init__(self, augs):
+        """
+        Args:
+            augs (list[Augmentation or Transform]):
+        """
+        super().__init__()
+        self.augs = [_transform_to_aug(x) for x in augs]
+
+    def __call__(self, aug_input) -> Transform:
+        tfms = []
+        for x in self.augs:
+            tfm = x(aug_input)
+            tfms.append(tfm)
+        return TransformList(tfms)
+
+    def __repr__(self):
+        msgs = [str(x) for x in self.augs]
+        return "AugmentationList[{}]".format(", ".join(msgs))
+
+    __str__ = __repr__
+
+
+class AugInput:
+    """
+    Input that can be used with :meth:`Augmentation.__call__`.
+    This is a standard implementation for the majority of use cases.
+    This class provides the standard attributes **"image", "boxes", "sem_seg"**
+    defined in :meth:`__init__` and they may be needed by different augmentations.
+    Most augmentation policies do not need attributes beyond these three.
+
+    After applying augmentations to these attributes (using :meth:`AugInput.transform`),
+    the returned transforms can then be used to transform other data structures that users have.
+
+    Examples:
+    ::
+        input = AugInput(image, boxes=boxes)
+        tfms = augmentation(input)
+        transformed_image = input.image
+        transformed_boxes = input.boxes
+        transformed_other_data = tfms.apply_other(other_data)
+
+    An extended project that works with new data types may implement augmentation policies
+    that need other inputs. An algorithm may need to transform inputs in a way different
+    from the standard approach defined in this class. In those rare situations, users can
+    implement a class similar to this class, that satify the following condition:
+
+    * The input must provide access to these data in the form of attribute access
+      (``getattr``).  For example, if an :class:`Augmentation` to be applied needs "image"
+      and "sem_seg" arguments, its input must have the attribute "image" and "sem_seg".
+    * The input must have a ``transform(tfm: Transform) -> None`` method which
+      in-place transforms all its attributes.
+    """
+
+    # TODO maybe should support more builtin data types here
+    def __init__(
+        self,
+        image: np.ndarray,
+        *,
+        boxes: Optional[np.ndarray] = None,
+        sem_seg: Optional[np.ndarray] = None,
+    ):
+        """
+        Args:
+            image (ndarray): (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or
+                floating point in range [0, 1] or [0, 255]. The meaning of C is up
+                to users.
+            boxes (ndarray or None): Nx4 float32 boxes in XYXY_ABS mode
+            sem_seg (ndarray or None): HxW uint8 semantic segmentation mask. Each element
+                is an integer label of pixel.
+        """
+        _check_img_dtype(image)
+        self.image = image
+        self.boxes = boxes
+        self.sem_seg = sem_seg
+
+    def transform(self, tfm: Transform) -> None:
+        """
+        In-place transform all attributes of this class.
+
+        By "in-place", it means after calling this method, accessing an attribute such
+        as ``self.image`` will return transformed data.
+        """
+        self.image = tfm.apply_image(self.image)
+        if self.boxes is not None:
+            self.boxes = tfm.apply_box(self.boxes)
+        if self.sem_seg is not None:
+            self.sem_seg = tfm.apply_segmentation(self.sem_seg)
+
+    def apply_augmentations(
+        self, augmentations: List[Union[Augmentation, Transform]]
+    ) -> TransformList:
+        """
+        Equivalent of ``AugmentationList(augmentations)(self)``
+        """
+        return AugmentationList(augmentations)(self)
+
+
+def apply_augmentations(augmentations: List[Union[Transform, Augmentation]], inputs):
+    """
+    Use ``T.AugmentationList(augmentations)(inputs)`` instead.
+    """
+    if isinstance(inputs, np.ndarray):
+        # handle the common case of image-only Augmentation, also for backward compatibility
+        image_only = True
+        inputs = AugInput(inputs)
+    else:
+        image_only = False
+    tfms = inputs.apply_augmentations(augmentations)
+    return inputs.image if image_only else inputs, tfms
+
+
+apply_transform_gens = apply_augmentations
+"""
+Alias for backward-compatibility.
+"""
+
+TransformGen = Augmentation
+"""
+Alias for Augmentation, since it is something that generates :class:`Transform`s
+"""
+
+StandardAugInput = AugInput
+"""
+Alias for compatibility. It's not worth the complexity to have two classes.
+"""
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/augmentation_impl.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/augmentation_impl.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d04b2d16a41873e57f809d0aa57cea39c1432c9
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/augmentation_impl.py
@@ -0,0 +1,579 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Implement many useful :class:`Augmentation`.
+"""
+import numpy as np
+import sys
+from fvcore.transforms.transform import (
+    BlendTransform,
+    CropTransform,
+    HFlipTransform,
+    NoOpTransform,
+    VFlipTransform,
+)
+from PIL import Image
+import random
+from .augmentation import Augmentation, _transform_to_aug
+from .transform import ExtentTransform, ResizeTransform, RotationTransform
+
+__all__ = [
+    "RandomApply",
+    "RandomBrightness",
+    "RandomContrast",
+    "RandomCrop",
+    "RandomExtent",
+    "RandomFlip",
+    "RandomSaturation",
+    "RandomLighting",
+    "RandomRotation",
+    "Resize",
+    "ResizeShortestEdge",
+    "RandomCrop_CategoryAreaConstraint",
+    "RandomCropWithInstance",
+]
+
+
+class RandomApply(Augmentation):
+    """
+    Randomly apply an augmentation with a given probability.
+    """
+
+    def __init__(self, tfm_or_aug, prob=0.5):
+        """
+        Args:
+            tfm_or_aug (Transform, Augmentation): the transform or augmentation
+                to be applied. It can either be a `Transform` or `Augmentation`
+                instance.
+            prob (float): probability between 0.0 and 1.0 that
+                the wrapper transformation is applied
+        """
+        super().__init__()
+        self.aug = _transform_to_aug(tfm_or_aug)
+        assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})"
+        self.prob = prob
+
+    def get_transform(self, *args):
+        do = self._rand_range() < self.prob
+        if do:
+            return self.aug.get_transform(*args)
+        else:
+            return NoOpTransform()
+
+    def __call__(self, aug_input):
+        do = self._rand_range() < self.prob
+        if do:
+            return self.aug(aug_input)
+        else:
+            return NoOpTransform()
+
+
+class RandomFlip(Augmentation):
+    """
+    Flip the image horizontally or vertically with the given probability.
+    """
+
+    def __init__(self, prob=0.5, *, horizontal=True, vertical=False):
+        """
+        Args:
+            prob (float): probability of flip.
+            horizontal (boolean): whether to apply horizontal flipping
+            vertical (boolean): whether to apply vertical flipping
+        """
+        super().__init__()
+
+        if horizontal and vertical:
+            raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.")
+        if not horizontal and not vertical:
+            raise ValueError("At least one of horiz or vert has to be True!")
+        self._init(locals())
+
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        do = self._rand_range() < self.prob
+        if do:
+            if self.horizontal:
+                return HFlipTransform(w)
+            elif self.vertical:
+                return VFlipTransform(h)
+        else:
+            return NoOpTransform()
+
+
+class Resize(Augmentation):
+    """ Resize image to a fixed target size"""
+
+    def __init__(self, shape, interp=Image.BILINEAR):
+        """
+        Args:
+            shape: (h, w) tuple or a int
+            interp: PIL interpolation method
+        """
+        if isinstance(shape, int):
+            shape = (shape, shape)
+        shape = tuple(shape)
+        self._init(locals())
+
+    def get_transform(self, image):
+        return ResizeTransform(
+            image.shape[0], image.shape[1], self.shape[0], self.shape[1], self.interp
+        )
+
+
+class ResizeShortestEdge(Augmentation):
+    """
+    Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge.
+    If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
+    """
+
+    def __init__(
+        self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR
+    ):
+        """
+        Args:
+            short_edge_length (list[int]): If ``sample_style=="range"``,
+                a [min, max] interval from which to sample the shortest edge length.
+                If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
+            max_size (int): maximum allowed longest edge length.
+            sample_style (str): either "range" or "choice".
+        """
+        super().__init__()
+        assert sample_style in ["range", "choice"], sample_style
+        self.is_range = sample_style == "range"
+        if isinstance(short_edge_length, int):
+            short_edge_length = (short_edge_length, short_edge_length)
+        if self.is_range:
+            assert len(short_edge_length) == 2, (
+                "short_edge_length must be two values using 'range' sample style."
+                f" Got {short_edge_length}!"
+            )
+        self._init(locals())
+
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        if self.is_range:
+            size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1)
+        else:
+            size = np.random.choice(self.short_edge_length)
+        if size == 0:
+            return NoOpTransform()
+        scale = size * 1.0 / min(h, w)
+        if h < w:
+            newh, neww = size, scale * w
+        else:
+            newh, neww = scale * h, size
+        if max(newh, neww) > self.max_size:
+            scale = self.max_size * 1.0 / max(newh, neww)
+            newh = newh * scale
+            neww = neww * scale
+        neww = int(neww + 0.5)
+        newh = int(newh + 0.5)
+        return ResizeTransform(h, w, newh, neww, self.interp)
+
+
+class RandomRotation(Augmentation):
+    """
+    This method returns a copy of this image, rotated the given
+    number of degrees counter clockwise around the given center.
+    """
+
+    def __init__(self, angle, expand=True, center=None, sample_style="range", interp=None):
+        """
+        Args:
+            angle (list[float]): If ``sample_style=="range"``,
+                a [min, max] interval from which to sample the angle (in degrees).
+                If ``sample_style=="choice"``, a list of angles to sample from
+            expand (bool): choose if the image should be resized to fit the whole
+                rotated image (default), or simply cropped
+            center (list[[float, float]]):  If ``sample_style=="range"``,
+                a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center,
+                [0, 0] being the top left of the image and [1, 1] the bottom right.
+                If ``sample_style=="choice"``, a list of centers to sample from
+                Default: None, which means that the center of rotation is the center of the image
+                center has no effect if expand=True because it only affects shifting
+        """
+        super().__init__()
+        assert sample_style in ["range", "choice"], sample_style
+        self.is_range = sample_style == "range"
+        if isinstance(angle, (float, int)):
+            angle = (angle, angle)
+        if center is not None and isinstance(center[0], (float, int)):
+            center = (center, center)
+        self._init(locals())
+
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        center = None
+        if self.is_range:
+            angle = np.random.uniform(self.angle[0], self.angle[1])
+            if self.center is not None:
+                center = (
+                    np.random.uniform(self.center[0][0], self.center[1][0]),
+                    np.random.uniform(self.center[0][1], self.center[1][1]),
+                )
+        else:
+            angle = np.random.choice(self.angle)
+            if self.center is not None:
+                center = np.random.choice(self.center)
+
+        if center is not None:
+            center = (w * center[0], h * center[1])  # Convert to absolute coordinates
+
+        if angle % 360 == 0:
+            return NoOpTransform()
+
+        return RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp)
+
+
+class RandomCrop(Augmentation):
+    """
+    Randomly crop a rectangle region out of an image.
+    """
+
+    def __init__(self, crop_type: str, crop_size):
+        """
+        Args:
+            crop_type (str): one of "relative_range", "relative", "absolute", "absolute_range".
+            crop_size (tuple[float, float]): two floats, explained below.
+
+        - "relative": crop a (H * crop_size[0], W * crop_size[1]) region from an input image of
+          size (H, W). crop size should be in (0, 1]
+        - "relative_range": uniformly sample two values from [crop_size[0], 1]
+          and [crop_size[1]], 1], and use them as in "relative" crop type.
+        - "absolute" crop a (crop_size[0], crop_size[1]) region from input image.
+          crop_size must be smaller than the input image size.
+        - "absolute_range", for an input of size (H, W), uniformly sample H_crop in
+          [crop_size[0], min(H, crop_size[1])] and W_crop in [crop_size[0], min(W, crop_size[1])].
+          Then crop a region (H_crop, W_crop).
+        """
+        # TODO style of relative_range and absolute_range are not consistent:
+        # one takes (h, w) but another takes (min, max)
+        super().__init__()
+        assert crop_type in ["relative_range", "relative", "absolute", "absolute_range"]
+        self._init(locals())
+
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        croph, cropw = self.get_crop_size((h, w))
+        assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self)
+        h0 = np.random.randint(h - croph + 1)
+        w0 = np.random.randint(w - cropw + 1)
+        return CropTransform(w0, h0, cropw, croph)
+
+    def get_crop_size(self, image_size):
+        """
+        Args:
+            image_size (tuple): height, width
+
+        Returns:
+            crop_size (tuple): height, width in absolute pixels
+        """
+        h, w = image_size
+        if self.crop_type == "relative":
+            ch, cw = self.crop_size
+            return int(h * ch + 0.5), int(w * cw + 0.5)
+        elif self.crop_type == "relative_range":
+            crop_size = np.asarray(self.crop_size, dtype=np.float32)
+            ch, cw = crop_size + np.random.rand(2) * (1 - crop_size)
+            return int(h * ch + 0.5), int(w * cw + 0.5)
+        elif self.crop_type == "absolute":
+            return (min(self.crop_size[0], h), min(self.crop_size[1], w))
+        elif self.crop_type == "absolute_range":
+            assert self.crop_size[0] <= self.crop_size[1]
+            ch = np.random.randint(min(h, self.crop_size[0]), min(h, self.crop_size[1]) + 1)
+            cw = np.random.randint(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1)
+            return ch, cw
+        else:
+            NotImplementedError("Unknown crop type {}".format(self.crop_type))
+
+
+class RandomCrop_CategoryAreaConstraint(Augmentation):
+    """
+    Similar to :class:`RandomCrop`, but find a cropping window such that no single category
+    occupies a ratio of more than `single_category_max_area` in semantic segmentation ground
+    truth, which can cause unstability in training. The function attempts to find such a valid
+    cropping window for at most 10 times.
+    """
+
+    def __init__(
+        self,
+        crop_type: str,
+        crop_size,
+        single_category_max_area: float = 1.0,
+        ignored_category: int = None,
+    ):
+        """
+        Args:
+            crop_type, crop_size: same as in :class:`RandomCrop`
+            single_category_max_area: the maximum allowed area ratio of a
+                category. Set to 1.0 to disable
+            ignored_category: allow this category in the semantic segmentation
+                ground truth to exceed the area ratio. Usually set to the category
+                that's ignored in training.
+        """
+        self.crop_aug = RandomCrop(crop_type, crop_size)
+        self._init(locals())
+
+    def get_transform(self, image, sem_seg):
+        if self.single_category_max_area >= 1.0:
+            return self.crop_aug.get_transform(image)
+        else:
+            h, w = sem_seg.shape
+            for _ in range(10):
+                crop_size = self.crop_aug.get_crop_size((h, w))
+                y0 = np.random.randint(h - crop_size[0] + 1)
+                x0 = np.random.randint(w - crop_size[1] + 1)
+                sem_seg_temp = sem_seg[y0 : y0 + crop_size[0], x0 : x0 + crop_size[1]]
+                labels, cnt = np.unique(sem_seg_temp, return_counts=True)
+                if self.ignored_category is not None:
+                    cnt = cnt[labels != self.ignored_category]
+                if len(cnt) > 1 and np.max(cnt) < np.sum(cnt) * self.single_category_max_area:
+                    break
+            crop_tfm = CropTransform(x0, y0, crop_size[1], crop_size[0])
+            return crop_tfm
+
+
+class RandomExtent(Augmentation):
+    """
+    Outputs an image by cropping a random "subrect" of the source image.
+
+    The subrect can be parameterized to include pixels outside the source image,
+    in which case they will be set to zeros (i.e. black). The size of the output
+    image will vary with the size of the random subrect.
+    """
+
+    def __init__(self, scale_range, shift_range):
+        """
+        Args:
+            output_size (h, w): Dimensions of output image
+            scale_range (l, h): Range of input-to-output size scaling factor
+            shift_range (x, y): Range of shifts of the cropped subrect. The rect
+                is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)],
+                where (w, h) is the (width, height) of the input image. Set each
+                component to zero to crop at the image's center.
+        """
+        super().__init__()
+        self._init(locals())
+
+    def get_transform(self, image):
+        img_h, img_w = image.shape[:2]
+
+        # Initialize src_rect to fit the input image.
+        src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h])
+
+        # Apply a random scaling to the src_rect.
+        src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1])
+
+        # Apply a random shift to the coordinates origin.
+        src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5)
+        src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5)
+
+        # Map src_rect coordinates into image coordinates (center at corner).
+        src_rect[0::2] += 0.5 * img_w
+        src_rect[1::2] += 0.5 * img_h
+
+        return ExtentTransform(
+            src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]),
+            output_size=(int(src_rect[3] - src_rect[1]), int(src_rect[2] - src_rect[0])),
+        )
+
+
+class RandomContrast(Augmentation):
+    """
+    Randomly transforms image contrast.
+
+    Contrast intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce contrast
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase contrast
+
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+
+    def __init__(self, intensity_min, intensity_max):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation
+            intensity_max (float): Maximum augmentation
+        """
+        super().__init__()
+        self._init(locals())
+
+    def get_transform(self, image):
+        w = np.random.uniform(self.intensity_min, self.intensity_max)
+        return BlendTransform(src_image=image.mean(), src_weight=1 - w, dst_weight=w)
+
+
+class RandomBrightness(Augmentation):
+    """
+    Randomly transforms image brightness.
+
+    Brightness intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce brightness
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase brightness
+
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+
+    def __init__(self, intensity_min, intensity_max):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation
+            intensity_max (float): Maximum augmentation
+        """
+        super().__init__()
+        self._init(locals())
+
+    def get_transform(self, image):
+        w = np.random.uniform(self.intensity_min, self.intensity_max)
+        return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w)
+
+
+class RandomSaturation(Augmentation):
+    """
+    Randomly transforms saturation of an RGB image.
+    Input images are assumed to have 'RGB' channel order.
+
+    Saturation intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce saturation (make the image more grayscale)
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase saturation
+
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+
+    def __init__(self, intensity_min, intensity_max):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation (1 preserves input).
+            intensity_max (float): Maximum augmentation (1 preserves input).
+        """
+        super().__init__()
+        self._init(locals())
+
+    def get_transform(self, image):
+        assert image.shape[-1] == 3, "RandomSaturation only works on RGB images"
+        w = np.random.uniform(self.intensity_min, self.intensity_max)
+        grayscale = image.dot([0.299, 0.587, 0.114])[:, :, np.newaxis]
+        return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w)
+
+
+class RandomLighting(Augmentation):
+    """
+    The "lighting" augmentation described in AlexNet, using fixed PCA over ImageNet.
+    Input images are assumed to have 'RGB' channel order.
+
+    The degree of color jittering is randomly sampled via a normal distribution,
+    with standard deviation given by the scale parameter.
+    """
+
+    def __init__(self, scale):
+        """
+        Args:
+            scale (float): Standard deviation of principal component weighting.
+        """
+        super().__init__()
+        self._init(locals())
+        self.eigen_vecs = np.array(
+            [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]]
+        )
+        self.eigen_vals = np.array([0.2175, 0.0188, 0.0045])
+
+    def get_transform(self, image):
+        assert image.shape[-1] == 3, "RandomLighting only works on RGB images"
+        weights = np.random.normal(scale=self.scale, size=3)
+        return BlendTransform(
+            src_image=self.eigen_vecs.dot(weights * self.eigen_vals), src_weight=1.0, dst_weight=1.0
+        )
+
+
+def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True):
+    """
+        Generate a CropTransform so that the cropping region contains
+        the center of the given instance.
+        Args:
+            crop_size (tuple): h, w in pixels
+            image_size (tuple): h, w
+            instance (dict): an annotation dict of one instance, in Detectron2's
+            dataset format.
+    """
+    bbox = random.choice(instances)
+    bbox[::2] = np.clip(bbox[::2],0,image_size[1])
+    bbox[1::2] = np.clip(bbox[1::2],0,image_size[0])
+    crop_size = np.asarray(crop_size, dtype=np.int32)
+    center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
+    assert (
+        image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
+    ), "The annotation bounding box is outside of the image!"
+    assert (
+        image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
+    ), "Crop size is larger than image size!"
+    min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
+    max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
+    max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
+    y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
+    x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
+    if not crop_box:
+        num_modifications = 0
+        modified = True
+        crop_size = crop_size.astype(np.float32)
+        while modified:
+            modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances)
+            num_modifications += 1
+            if num_modifications > 100:
+                raise ValueError(
+                        "Cannot finished cropping adjustment within 100 tries (#instances {}).".format(
+                                len(instances)
+                                )
+                        )
+                return CropTransform(0, 0, image_size[1], image_size[0])
+    if (x0 < 0) or (y0 < 0):
+        x0 = np.maximum(x0,0)
+        y0 = np.maximum(y0,0)
+    return CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0])))
+
+def adjust_crop(x0, y0, crop_size, instances, eps=1e-3):
+    modified = False
+    x1 = x0 + crop_size[1]
+    y1 = y0 + crop_size[0]
+    for bbox in instances:
+
+        if bbox[0] < x0 - eps and bbox[2] > x0 + eps:
+            crop_size[1] += x0 - bbox[0]
+            x0 = bbox[0]
+            modified = True
+
+        if bbox[0] < x1 - eps and bbox[2] > x1 + eps:
+            crop_size[1] += bbox[2] - x1
+            x1 = bbox[2]
+            modified = True
+
+        if bbox[1] < y0 - eps and bbox[3] > y0 + eps:
+            crop_size[0] += y0 - bbox[1]
+            y0 = bbox[1]
+            modified = True
+
+        if bbox[1] < y1 - eps and bbox[3] > y1 + eps:
+            crop_size[0] += bbox[3] - y1
+            y1 = bbox[3]
+            modified = True
+
+    return modified, x0, y0, crop_size
+
+
+class RandomCropWithInstance(RandomCrop):
+    def __init__(self, crop_type, crop_size, crop_instance=False):
+        """
+        Args:
+        crop_instance (bool): if False, extend cropping boxes to avoid cropping instances
+        """
+        super().__init__(crop_type, crop_size)
+        self.crop_instance = crop_instance
+        self.input_args = ("image", "boxes")
+    def get_transform(self, img, boxes):
+        image_size = img.shape[:2]
+        crop_size = self.get_crop_size(image_size)
+        return gen_crop_transform_with_instance(
+            crop_size, image_size, boxes, crop_box=self.crop_instance
+        )
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/transform.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..de44b991d7ab0d920ffb769e1402f08e358d37f7
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/data/transforms/transform.py
@@ -0,0 +1,351 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+See "Data Augmentation" tutorial for an overview of the system:
+https://detectron2.readthedocs.io/tutorials/augmentation.html
+"""
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from fvcore.transforms.transform import (
+    CropTransform,
+    HFlipTransform,
+    NoOpTransform,
+    Transform,
+    TransformList,
+)
+from PIL import Image
+
+try:
+    import cv2  # noqa
+except ImportError:
+    # OpenCV is an optional dependency at the moment
+    pass
+
+__all__ = [
+    "ExtentTransform",
+    "ResizeTransform",
+    "RotationTransform",
+    "ColorTransform",
+    "PILColorTransform",
+]
+
+
+class ExtentTransform(Transform):
+    """
+    Extracts a subregion from the source image and scales it to the output size.
+
+    The fill color is used to map pixels from the source rect that fall outside
+    the source image.
+
+    See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform
+    """
+
+    def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0):
+        """
+        Args:
+            src_rect (x0, y0, x1, y1): src coordinates
+            output_size (h, w): dst image size
+            interp: PIL interpolation methods
+            fill: Fill color used when src_rect extends outside image
+        """
+        super().__init__()
+        self._set_attributes(locals())
+
+    def apply_image(self, img, interp=None):
+        h, w = self.output_size
+        if len(img.shape) > 2 and img.shape[2] == 1:
+            pil_image = Image.fromarray(img[:, :, 0], mode="L")
+        else:
+            pil_image = Image.fromarray(img)
+        pil_image = pil_image.transform(
+            size=(w, h),
+            method=Image.EXTENT,
+            data=self.src_rect,
+            resample=interp if interp else self.interp,
+            fill=self.fill,
+        )
+        ret = np.asarray(pil_image)
+        if len(img.shape) > 2 and img.shape[2] == 1:
+            ret = np.expand_dims(ret, -1)
+        return ret
+
+    def apply_coords(self, coords):
+        # Transform image center from source coordinates into output coordinates
+        # and then map the new origin to the corner of the output image.
+        h, w = self.output_size
+        x0, y0, x1, y1 = self.src_rect
+        new_coords = coords.astype(np.float32)
+        new_coords[:, 0] -= 0.5 * (x0 + x1)
+        new_coords[:, 1] -= 0.5 * (y0 + y1)
+        new_coords[:, 0] *= w / (x1 - x0)
+        new_coords[:, 1] *= h / (y1 - y0)
+        new_coords[:, 0] += 0.5 * w
+        new_coords[:, 1] += 0.5 * h
+        return new_coords
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+
+
+class ResizeTransform(Transform):
+    """
+    Resize the image to a target size.
+    """
+
+    def __init__(self, h, w, new_h, new_w, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            new_h, new_w (int): new image size
+            interp: PIL interpolation methods, defaults to bilinear.
+        """
+        # TODO decide on PIL vs opencv
+        super().__init__()
+        if interp is None:
+            interp = Image.BILINEAR
+        self._set_attributes(locals())
+
+    def apply_image(self, img, interp=None):
+        assert img.shape[:2] == (self.h, self.w)
+        assert len(img.shape) <= 4
+        interp_method = interp if interp is not None else self.interp
+
+        if img.dtype == np.uint8:
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                pil_image = Image.fromarray(img[:, :, 0], mode="L")
+            else:
+                pil_image = Image.fromarray(img)
+            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
+            ret = np.asarray(pil_image)
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                ret = np.expand_dims(ret, -1)
+        else:
+            # PIL only supports uint8
+            if any(x < 0 for x in img.strides):
+                img = np.ascontiguousarray(img)
+            img = torch.from_numpy(img)
+            shape = list(img.shape)
+            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
+            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
+            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
+                Image.NEAREST: "nearest",
+                Image.BILINEAR: "bilinear",
+                Image.BICUBIC: "bicubic",
+            }
+            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
+            align_corners = None if mode == "nearest" else False
+            img = F.interpolate(
+                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
+            )
+            shape[:2] = (self.new_h, self.new_w)
+            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
+
+        return ret
+
+    def apply_coords(self, coords):
+        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
+        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
+        return coords
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+
+    def inverse(self):
+        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
+
+
+class RotationTransform(Transform):
+    """
+    This method returns a copy of this image, rotated the given
+    number of degrees counter clockwise around its center.
+    """
+
+    def __init__(self, h, w, angle, expand=True, center=None, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            angle (float): degrees for rotation
+            expand (bool): choose if the image should be resized to fit the whole
+                rotated image (default), or simply cropped
+            center (tuple (width, height)): coordinates of the rotation center
+                if left to None, the center will be fit to the center of each image
+                center has no effect if expand=True because it only affects shifting
+            interp: cv2 interpolation method, default cv2.INTER_LINEAR
+        """
+        super().__init__()
+        image_center = np.array((w / 2, h / 2))
+        if center is None:
+            center = image_center
+        if interp is None:
+            interp = cv2.INTER_LINEAR
+        abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle))))
+        if expand:
+            # find the new width and height bounds
+            bound_w, bound_h = np.rint(
+                [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin]
+            ).astype(int)
+        else:
+            bound_w, bound_h = w, h
+
+        self._set_attributes(locals())
+        self.rm_coords = self.create_rotation_matrix()
+        # Needed because of this problem https://github.com/opencv/opencv/issues/11784
+        self.rm_image = self.create_rotation_matrix(offset=-0.5)
+
+    def apply_image(self, img, interp=None):
+        """
+        img should be a numpy array, formatted as Height * Width * Nchannels
+        """
+        if len(img) == 0 or self.angle % 360 == 0:
+            return img
+        assert img.shape[:2] == (self.h, self.w)
+        interp = interp if interp is not None else self.interp
+        return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp)
+
+    def apply_coords(self, coords):
+        """
+        coords should be a N * 2 array-like, containing N couples of (x, y) points
+        """
+        coords = np.asarray(coords, dtype=float)
+        if len(coords) == 0 or self.angle % 360 == 0:
+            return coords
+        return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :]
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST)
+        return segmentation
+
+    def create_rotation_matrix(self, offset=0):
+        center = (self.center[0] + offset, self.center[1] + offset)
+        rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1)
+        if self.expand:
+            # Find the coordinates of the center of rotation in the new image
+            # The only point for which we know the future coordinates is the center of the image
+            rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :]
+            new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center
+            # shift the rotation center to the new coordinates
+            rm[:, 2] += new_center
+        return rm
+
+    def inverse(self):
+        """
+        The inverse is to rotate it back with expand, and crop to get the original shape.
+        """
+        if not self.expand:  # Not possible to inverse if a part of the image is lost
+            raise NotImplementedError()
+        rotation = RotationTransform(
+            self.bound_h, self.bound_w, -self.angle, True, None, self.interp
+        )
+        crop = CropTransform(
+            (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h
+        )
+        return TransformList([rotation, crop])
+
+
+class ColorTransform(Transform):
+    """
+    Generic wrapper for any photometric transforms.
+    These transformations should only affect the color space and
+        not the coordinate space of the image (e.g. annotation
+        coordinates such as bounding boxes should not be changed)
+    """
+
+    def __init__(self, op):
+        """
+        Args:
+            op (Callable): operation to be applied to the image,
+                which takes in an ndarray and returns an ndarray.
+        """
+        if not callable(op):
+            raise ValueError("op parameter should be callable")
+        super().__init__()
+        self._set_attributes(locals())
+
+    def apply_image(self, img):
+        return self.op(img)
+
+    def apply_coords(self, coords):
+        return coords
+
+    def inverse(self):
+        return NoOpTransform()
+
+    def apply_segmentation(self, segmentation):
+        return segmentation
+
+
+class PILColorTransform(ColorTransform):
+    """
+    Generic wrapper for PIL Photometric image transforms,
+        which affect the color space and not the coordinate
+        space of the image
+    """
+
+    def __init__(self, op):
+        """
+        Args:
+            op (Callable): operation to be applied to the image,
+                which takes in a PIL Image and returns a transformed
+                PIL Image.
+                For reference on possible operations see:
+                - https://pillow.readthedocs.io/en/stable/
+        """
+        if not callable(op):
+            raise ValueError("op parameter should be callable")
+        super().__init__(op)
+
+    def apply_image(self, img):
+        img = Image.fromarray(img)
+        return np.asarray(super().apply_image(img))
+
+
+def HFlip_rotated_box(transform, rotated_boxes):
+    """
+    Apply the horizontal flip transform on rotated boxes.
+
+    Args:
+        rotated_boxes (ndarray): Nx5 floating point array of
+            (x_center, y_center, width, height, angle_degrees) format
+            in absolute coordinates.
+    """
+    # Transform x_center
+    rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0]
+    # Transform angle
+    rotated_boxes[:, 4] = -rotated_boxes[:, 4]
+    return rotated_boxes
+
+
+def Resize_rotated_box(transform, rotated_boxes):
+    """
+    Apply the resizing transform on rotated boxes. For details of how these (approximation)
+    formulas are derived, please refer to :meth:`RotatedBoxes.scale`.
+
+    Args:
+        rotated_boxes (ndarray): Nx5 floating point array of
+            (x_center, y_center, width, height, angle_degrees) format
+            in absolute coordinates.
+    """
+    scale_factor_x = transform.new_w * 1.0 / transform.w
+    scale_factor_y = transform.new_h * 1.0 / transform.h
+    rotated_boxes[:, 0] *= scale_factor_x
+    rotated_boxes[:, 1] *= scale_factor_y
+    theta = rotated_boxes[:, 4] * np.pi / 180.0
+    c = np.cos(theta)
+    s = np.sin(theta)
+    rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s))
+    rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c))
+    rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi
+
+    return rotated_boxes
+
+
+HFlipTransform.register_type("rotated_box", HFlip_rotated_box)
+ResizeTransform.register_type("rotated_box", Resize_rotated_box)
+
+# not necessary any more with latest fvcore
+NoOpTransform.register_type("rotated_box", lambda t, x: x)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..08a61572b4c7d09c8d400e903a96cbf5b2cc4763
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from .launch import *
+from .train_loop import *
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+
+
+# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
+# but still make them available here
+from .hooks import *
+from .defaults import *
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/defaults.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/defaults.py
new file mode 100644
index 0000000000000000000000000000000000000000..57fd52a75c88a3193ae34aa8d420838d88197135
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/defaults.py
@@ -0,0 +1,650 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+This file contains components with some default boilerplate logic user may need
+in training / testing. They will not work for everyone, but many users may find them useful.
+
+The behavior of functions/classes in this file is subject to change,
+since they are meant to represent the "common default behavior" people need in their projects.
+"""
+
+import argparse
+import logging
+import os
+import sys
+from collections import OrderedDict
+from typing import Optional
+import torch
+from fvcore.nn.precise_bn import get_bn_modules
+from torch.nn.parallel import DistributedDataParallel
+
+import detectron2.data.transforms as T
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.data import (
+    MetadataCatalog,
+    build_detection_test_loader,
+    build_detection_train_loader,
+)
+from detectron2.evaluation import (
+    DatasetEvaluator,
+    inference_on_dataset,
+    print_csv_format,
+    verify_results,
+)
+from detectron2.modeling import build_model
+from detectron2.solver import build_lr_scheduler, build_optimizer
+from detectron2.utils import comm
+from detectron2.utils.collect_env import collect_env_info
+from detectron2.utils.env import TORCH_VERSION, seed_all_rng
+from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+
+from . import hooks
+from .train_loop import AMPTrainer, SimpleTrainer, TrainerBase
+
+__all__ = [
+    "default_argument_parser",
+    "default_setup",
+    "default_writers",
+    "DefaultPredictor",
+    "DefaultTrainer",
+]
+
+
+def default_argument_parser(epilog=None):
+    """
+    Create a parser with some common arguments used by detectron2 users.
+
+    Args:
+        epilog (str): epilog passed to ArgumentParser describing the usage.
+
+    Returns:
+        argparse.ArgumentParser:
+    """
+    parser = argparse.ArgumentParser(
+        epilog=epilog
+        or f"""
+Examples:
+
+Run on single machine:
+    $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml
+
+Change some config options:
+    $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001
+
+Run on multiple machines:
+    (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url <URL> [--other-flags]
+    (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url <URL> [--other-flags]
+""",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
+    parser.add_argument(
+        "--resume",
+        action="store_true",
+        help="Whether to attempt to resume from the checkpoint directory. "
+        "See documentation of `DefaultTrainer.resume_or_load()` for what it means.",
+    )
+    parser.add_argument("--eval-only", action="store_true", help="perform evaluation only")
+    parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*")
+    parser.add_argument("--num-machines", type=int, default=1, help="total number of machines")
+    parser.add_argument(
+        "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)"
+    )
+
+    # PyTorch still may leave orphan processes in multi-gpu training.
+    # Therefore we use a deterministic way to obtain port,
+    # so that users are aware of orphan processes by seeing the port occupied.
+    port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14
+    parser.add_argument(
+        "--dist-url",
+        default="tcp://127.0.0.1:{}".format(port),
+        help="initialization URL for pytorch distributed backend. See "
+        "https://pytorch.org/docs/stable/distributed.html for details.",
+    )
+    parser.add_argument(
+        "opts",
+        help="Modify config options by adding 'KEY VALUE' pairs at the end of the command. "
+        "See config references at "
+        "https://detectron2.readthedocs.io/modules/config.html#config-references",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+
+
+def default_setup(cfg, args):
+    """
+    Perform some basic common setups at the beginning of a job, including:
+
+    1. Set up the detectron2 logger
+    2. Log basic information about environment, cmdline arguments, and config
+    3. Backup the config to the output directory
+
+    Args:
+        cfg (CfgNode): the full config to be used
+        args (argparse.NameSpace): the command line arguments to be logged
+    """
+    output_dir = cfg.OUTPUT_DIR
+    if comm.is_main_process() and output_dir:
+        PathManager.mkdirs(output_dir)
+
+    rank = comm.get_rank()
+    setup_logger(output_dir, distributed_rank=rank, name="fvcore")
+    logger = setup_logger(output_dir, distributed_rank=rank)
+
+    logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size()))
+    logger.info("Environment info:\n" + collect_env_info())
+
+    logger.info("Command line arguments: " + str(args))
+    if hasattr(args, "config_file") and args.config_file != "":
+        logger.info(
+            "Contents of args.config_file={}:\n{}".format(
+                args.config_file, PathManager.open(args.config_file, "r").read()
+            )
+        )
+
+    logger.info("Running with full config:\n{}".format(cfg))
+    if comm.is_main_process() and output_dir:
+        # Note: some of our scripts may expect the existence of
+        # config.yaml in output directory
+        path = os.path.join(output_dir, "config.yaml")
+        with PathManager.open(path, "w") as f:
+            f.write(cfg.dump())
+        logger.info("Full config saved to {}".format(path))
+
+    # make sure each worker has a different, yet deterministic seed if specified
+    seed_all_rng(None if cfg.SEED < 0 else cfg.SEED + rank)
+
+    # cudnn benchmark has large overhead. It shouldn't be used considering the small size of
+    # typical validation set.
+    if not (hasattr(args, "eval_only") and args.eval_only):
+        torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK
+
+
+def default_writers(output_dir: str, max_iter: Optional[int] = None):
+    """
+    Build a list of :class:`EventWriter` to be used.
+    It now consists of a :class:`CommonMetricPrinter`,
+    :class:`TensorboardXWriter` and :class:`JSONWriter`.
+
+    Args:
+        output_dir: directory to store JSON metrics and tensorboard events
+        max_iter: the total number of iterations
+
+    Returns:
+        list[EventWriter]: a list of :class:`EventWriter` objects.
+    """
+    return [
+        # It may not always print what you want to see, since it prints "common" metrics only.
+        CommonMetricPrinter(max_iter),
+        JSONWriter(os.path.join(output_dir, "metrics.json")),
+        TensorboardXWriter(output_dir),
+    ]
+
+
+class DefaultPredictor:
+    """
+    Create a simple end-to-end predictor with the given config that runs on
+    single device for a single input image.
+
+    Compared to using the model directly, this class does the following additions:
+
+    1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
+    2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`.
+    3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
+    4. Take one input image and produce a single output, instead of a batch.
+
+    If you'd like to do anything more fancy, please refer to its source code
+    as examples to build and use the model manually.
+
+    Attributes:
+        metadata (Metadata): the metadata of the underlying dataset, obtained from
+            cfg.DATASETS.TEST.
+
+    Examples:
+    ::
+        pred = DefaultPredictor(cfg)
+        inputs = cv2.imread("input.jpg")
+        outputs = pred(inputs)
+    """
+
+    def __init__(self, cfg):
+        self.cfg = cfg.clone()  # cfg can be modified by model
+        self.model = build_model(self.cfg)
+        self.model.eval()
+        if len(cfg.DATASETS.TEST):
+            self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
+
+        checkpointer = DetectionCheckpointer(self.model)
+        checkpointer.load(cfg.MODEL.WEIGHTS)
+
+        self.aug = T.ResizeShortestEdge(
+            [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
+        )
+
+        self.input_format = cfg.INPUT.FORMAT
+        assert self.input_format in ["RGB", "BGR"], self.input_format
+
+    def __call__(self, original_image):
+        """
+        Args:
+            original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+
+        Returns:
+            predictions (dict):
+                the output of the model for one image only.
+                See :doc:`/tutorials/models` for details about the format.
+        """
+        with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
+            # Apply pre-processing to image.
+            if self.input_format == "RGB":
+                # whether the model expects BGR inputs or RGB
+                original_image = original_image[:, :, ::-1]
+            height, width = original_image.shape[:2]
+            image = self.aug.get_transform(original_image).apply_image(original_image)
+            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
+
+            inputs = {"image": image, "height": height, "width": width}
+            predictions = self.model([inputs])[0]
+            return predictions
+
+
+class DefaultTrainer(TrainerBase):
+    """
+    A trainer with default training logic. It does the following:
+
+    1. Create a :class:`SimpleTrainer` using model, optimizer, dataloader
+       defined by the given config. Create a LR scheduler defined by the config.
+    2. Load the last checkpoint or `cfg.MODEL.WEIGHTS`, if exists, when
+       `resume_or_load` is called.
+    3. Register a few common hooks defined by the config.
+
+    It is created to simplify the **standard model training workflow** and reduce code boilerplate
+    for users who only need the standard training workflow, with standard features.
+    It means this class makes *many assumptions* about your training logic that
+    may easily become invalid in a new research. In fact, any assumptions beyond those made in the
+    :class:`SimpleTrainer` are too much for research.
+
+    The code of this class has been annotated about restrictive assumptions it makes.
+    When they do not work for you, you're encouraged to:
+
+    1. Overwrite methods of this class, OR:
+    2. Use :class:`SimpleTrainer`, which only does minimal SGD training and
+       nothing else. You can then add your own hooks if needed. OR:
+    3. Write your own training loop similar to `tools/plain_train_net.py`.
+
+    See the :doc:`/tutorials/training` tutorials for more details.
+
+    Note that the behavior of this class, like other functions/classes in
+    this file, is not stable, since it is meant to represent the "common default behavior".
+    It is only guaranteed to work well with the standard models and training workflow in detectron2.
+    To obtain more stable behavior, write your own training logic with other public APIs.
+
+    Examples:
+    ::
+        trainer = DefaultTrainer(cfg)
+        trainer.resume_or_load()  # load last checkpoint or MODEL.WEIGHTS
+        trainer.train()
+
+    Attributes:
+        scheduler:
+        checkpointer (DetectionCheckpointer):
+        cfg (CfgNode):
+    """
+
+    def __init__(self, cfg):
+        """
+        Args:
+            cfg (CfgNode):
+        """
+        super().__init__()
+        logger = logging.getLogger("detectron2")
+        if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called for d2
+            setup_logger()
+        cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())
+
+        # Assume these objects must be constructed in this order.
+        model = self.build_model(cfg)
+        optimizer = self.build_optimizer(cfg, model)
+        data_loader = self.build_train_loader(cfg)
+
+        # For training, wrap with DDP. But don't need this for inference.
+        if comm.get_world_size() > 1:
+            model = DistributedDataParallel(
+                model, device_ids=[comm.get_local_rank()], broadcast_buffers=False,find_unused_parameters=True
+            )
+        self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(
+            model, data_loader, optimizer
+        )
+
+        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
+        # Assume no other objects need to be checkpointed.
+        # We can later make it checkpoint the stateful hooks
+        self.checkpointer = DetectionCheckpointer(
+            # Assume you want to save checkpoints together with logs/statistics
+            model,
+            cfg.OUTPUT_DIR,
+            optimizer=optimizer,
+            scheduler=self.scheduler,
+        )
+        self.start_iter = 0
+        self.max_iter = cfg.SOLVER.MAX_ITER
+        self.cfg = cfg
+
+        self.register_hooks(self.build_hooks())
+
+    def resume_or_load(self, resume=True):
+        """
+        If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by
+        a `last_checkpoint` file), resume from the file. Resuming means loading all
+        available states (eg. optimizer and scheduler) and update iteration counter
+        from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used.
+
+        Otherwise, this is considered as an independent training. The method will load model
+        weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start
+        from iteration 0.
+
+        Args:
+            resume (bool): whether to do resume or not
+        """
+        checkpoint = self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume)
+        if resume and self.checkpointer.has_checkpoint():
+            self.start_iter = checkpoint.get("iteration", -1) + 1
+            # The checkpoint stores the training iteration that just finished, thus we start
+            # at the next iteration (or iter zero if there's no checkpoint).
+        if isinstance(self.model, DistributedDataParallel):
+            # broadcast loaded data/model from the first rank, because other
+            # machines may not have access to the checkpoint file
+            if TORCH_VERSION >= (1, 7):
+                self.model._sync_params_and_buffers()
+            self.start_iter = comm.all_gather(self.start_iter)[0]
+
+    def build_hooks(self):
+        """
+        Build a list of default hooks, including timing, evaluation,
+        checkpointing, lr scheduling, precise BN, writing events.
+
+        Returns:
+            list[HookBase]:
+        """
+        cfg = self.cfg.clone()
+        cfg.defrost()
+        cfg.DATALOADER.NUM_WORKERS = 0  # save some memory and time for PreciseBN
+
+        ret = [
+            hooks.IterationTimer(),
+            hooks.LRScheduler(),
+            hooks.PreciseBN(
+                # Run at the same freq as (but before) evaluation.
+                cfg.TEST.EVAL_PERIOD,
+                self.model,
+                # Build a new data loader to not affect training
+                self.build_train_loader(cfg),
+                cfg.TEST.PRECISE_BN.NUM_ITER,
+            )
+            if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model)
+            else None,
+        ]
+
+        # Do PreciseBN before checkpointer, because it updates the model and need to
+        # be saved by checkpointer.
+        # This is not always the best: if checkpointing has a different frequency,
+        # some checkpoints may have more precise statistics than others.
+        if comm.is_main_process():
+            ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD))
+
+        def test_and_save_results():
+            self._last_eval_results = self.test(self.cfg, self.model)
+            return self._last_eval_results
+
+        # Do evaluation after checkpointer, because then if it fails,
+        # we can use the saved checkpoint to debug.
+        ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))
+
+        if comm.is_main_process():
+            # Here the default print/log frequency of each writer is used.
+            # run writers in the end, so that evaluation metrics are written
+            ret.append(hooks.PeriodicWriter(self.build_writers(), period=20))
+        return ret
+
+    def build_writers(self):
+        """
+        Build a list of writers to be used using :func:`default_writers()`.
+        If you'd like a different list of writers, you can overwrite it in
+        your trainer.
+
+        Returns:
+            list[EventWriter]: a list of :class:`EventWriter` objects.
+        """
+        return default_writers(self.cfg.OUTPUT_DIR, self.max_iter)
+
+    def train(self):
+        """
+        Run training.
+
+        Returns:
+            OrderedDict of results, if evaluation is enabled. Otherwise None.
+        """
+        super().train(self.start_iter, self.max_iter)
+        if len(self.cfg.TEST.EXPECTED_RESULTS) and comm.is_main_process():
+            assert hasattr(
+                self, "_last_eval_results"
+            ), "No evaluation results obtained during training!"
+            verify_results(self.cfg, self._last_eval_results)
+            return self._last_eval_results
+
+    def run_step(self):
+        self._trainer.iter = self.iter
+        self._trainer.run_step()
+
+    @classmethod
+    def build_model(cls, cfg):
+        """
+        Returns:
+            torch.nn.Module:
+
+        It now calls :func:`detectron2.modeling.build_model`.
+        Overwrite it if you'd like a different model.
+        """
+        model = build_model(cfg)
+        logger = logging.getLogger(__name__)
+        logger.info("Model:\n{}".format(model))
+        return model
+
+    @classmethod
+    def build_optimizer(cls, cfg, model):
+        """
+        Returns:
+            torch.optim.Optimizer:
+
+        It now calls :func:`detectron2.solver.build_optimizer`.
+        Overwrite it if you'd like a different optimizer.
+        """
+        return build_optimizer(cfg, model)
+
+    @classmethod
+    def build_lr_scheduler(cls, cfg, optimizer):
+        """
+        It now calls :func:`detectron2.solver.build_lr_scheduler`.
+        Overwrite it if you'd like a different scheduler.
+        """
+        return build_lr_scheduler(cfg, optimizer)
+
+    @classmethod
+    def build_train_loader(cls, cfg):
+        """
+        Returns:
+            iterable
+
+        It now calls :func:`detectron2.data.build_detection_train_loader`.
+        Overwrite it if you'd like a different data loader.
+        """
+        return build_detection_train_loader(cfg)
+
+    @classmethod
+    def build_test_loader(cls, cfg, dataset_name):
+        """
+        Returns:
+            iterable
+
+        It now calls :func:`detectron2.data.build_detection_test_loader`.
+        Overwrite it if you'd like a different data loader.
+        """
+        return build_detection_test_loader(cfg, dataset_name)
+
+    @classmethod
+    def build_evaluator(cls, cfg, dataset_name):
+        """
+        Returns:
+            DatasetEvaluator or None
+
+        It is not implemented by default.
+        """
+        raise NotImplementedError(
+            """
+If you want DefaultTrainer to automatically run evaluation,
+please implement `build_evaluator()` in subclasses (see train_net.py for example).
+Alternatively, you can call evaluation functions yourself (see Colab balloon tutorial for example).
+"""
+        )
+
+    @classmethod
+    def test(cls, cfg, model, evaluators=None):
+        """
+        Args:
+            cfg (CfgNode):
+            model (nn.Module):
+            evaluators (list[DatasetEvaluator] or None): if None, will call
+                :meth:`build_evaluator`. Otherwise, must have the same length as
+                ``cfg.DATASETS.TEST``.
+
+        Returns:
+            dict: a dict of result metrics
+        """
+        logger = logging.getLogger(__name__)
+        if isinstance(evaluators, DatasetEvaluator):
+            evaluators = [evaluators]
+        if evaluators is not None:
+            assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
+                len(cfg.DATASETS.TEST), len(evaluators)
+            )
+
+        results = OrderedDict()
+        for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
+            data_loader = cls.build_test_loader(cfg, dataset_name)
+            # When evaluators are passed in as arguments,
+            # implicitly assume that evaluators can be created before data_loader.
+            if evaluators is not None:
+                evaluator = evaluators[idx]
+            else:
+                try:
+                    evaluator = cls.build_evaluator(cfg, dataset_name)
+                except NotImplementedError:
+                    logger.warn(
+                        "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
+                        "or implement its `build_evaluator` method."
+                    )
+                    results[dataset_name] = {}
+                    continue
+            results_i = inference_on_dataset(model, data_loader, evaluator)
+            results[dataset_name] = results_i
+            if comm.is_main_process():
+                assert isinstance(
+                    results_i, dict
+                ), "Evaluator must return a dict on the main process. Got {} instead.".format(
+                    results_i
+                )
+                logger.info("Evaluation results for {} in csv format:".format(dataset_name))
+                print_csv_format(results_i)
+
+        if len(results) == 1:
+            results = list(results.values())[0]
+        return results
+
+    @staticmethod
+    def auto_scale_workers(cfg, num_workers: int):
+        """
+        When the config is defined for certain number of workers (according to
+        ``cfg.SOLVER.REFERENCE_WORLD_SIZE``) that's different from the number of
+        workers currently in use, returns a new cfg where the total batch size
+        is scaled so that the per-GPU batch size stays the same as the
+        original ``IMS_PER_BATCH // REFERENCE_WORLD_SIZE``.
+
+        Other config options are also scaled accordingly:
+        * training steps and warmup steps are scaled inverse proportionally.
+        * learning rate are scaled proportionally, following :paper:`ImageNet in 1h`.
+
+        For example, with the original config like the following:
+
+        .. code-block:: yaml
+
+            IMS_PER_BATCH: 16
+            BASE_LR: 0.1
+            REFERENCE_WORLD_SIZE: 8
+            MAX_ITER: 5000
+            STEPS: (4000,)
+            CHECKPOINT_PERIOD: 1000
+
+        When this config is used on 16 GPUs instead of the reference number 8,
+        calling this method will return a new config with:
+
+        .. code-block:: yaml
+
+            IMS_PER_BATCH: 32
+            BASE_LR: 0.2
+            REFERENCE_WORLD_SIZE: 16
+            MAX_ITER: 2500
+            STEPS: (2000,)
+            CHECKPOINT_PERIOD: 500
+
+        Note that both the original config and this new config can be trained on 16 GPUs.
+        It's up to user whether to enable this feature (by setting ``REFERENCE_WORLD_SIZE``).
+
+        Returns:
+            CfgNode: a new config. Same as original if ``cfg.SOLVER.REFERENCE_WORLD_SIZE==0``.
+        """
+        old_world_size = cfg.SOLVER.REFERENCE_WORLD_SIZE
+        if old_world_size == 0 or old_world_size == num_workers:
+            return cfg
+        cfg = cfg.clone()
+        frozen = cfg.is_frozen()
+        cfg.defrost()
+
+        assert (
+            cfg.SOLVER.IMS_PER_BATCH % old_world_size == 0
+        ), "Invalid REFERENCE_WORLD_SIZE in config!"
+        scale = num_workers / old_world_size
+        bs = cfg.SOLVER.IMS_PER_BATCH = int(round(cfg.SOLVER.IMS_PER_BATCH * scale))
+        lr = cfg.SOLVER.BASE_LR = cfg.SOLVER.BASE_LR * scale
+        max_iter = cfg.SOLVER.MAX_ITER = int(round(cfg.SOLVER.MAX_ITER / scale))
+        warmup_iter = cfg.SOLVER.WARMUP_ITERS = int(round(cfg.SOLVER.WARMUP_ITERS / scale))
+        cfg.SOLVER.STEPS = tuple(int(round(s / scale)) for s in cfg.SOLVER.STEPS)
+        cfg.TEST.EVAL_PERIOD = int(round(cfg.TEST.EVAL_PERIOD / scale))
+        cfg.SOLVER.CHECKPOINT_PERIOD = int(round(cfg.SOLVER.CHECKPOINT_PERIOD / scale))
+        cfg.SOLVER.REFERENCE_WORLD_SIZE = num_workers  # maintain invariant
+        logger = logging.getLogger(__name__)
+        logger.info(
+            f"Auto-scaling the config to batch_size={bs}, learning_rate={lr}, "
+            f"max_iter={max_iter}, warmup={warmup_iter}."
+        )
+
+        if frozen:
+            cfg.freeze()
+        return cfg
+
+
+# Access basic attributes from the underlying trainer
+for _attr in ["model", "data_loader", "optimizer"]:
+    setattr(
+        DefaultTrainer,
+        _attr,
+        property(
+            # getter
+            lambda self, x=_attr: getattr(self._trainer, x),
+            # setter
+            lambda self, value, x=_attr: setattr(self._trainer, x, value),
+        ),
+    )
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/hooks.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..56551af7c7e8a07de67c455abffd007574bae5fd
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/hooks.py
@@ -0,0 +1,450 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import datetime
+import itertools
+import logging
+import os
+import tempfile
+import time
+from collections import Counter
+import torch
+from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer
+from fvcore.common.param_scheduler import ParamScheduler
+from fvcore.common.timer import Timer
+from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats
+
+import detectron2.utils.comm as comm
+from detectron2.evaluation.testing import flatten_results_dict
+from detectron2.solver import LRMultiplier
+from detectron2.utils.events import EventStorage, EventWriter
+from detectron2.utils.file_io import PathManager
+
+from .train_loop import HookBase
+
+__all__ = [
+    "CallbackHook",
+    "IterationTimer",
+    "PeriodicWriter",
+    "PeriodicCheckpointer",
+    "LRScheduler",
+    "AutogradProfiler",
+    "EvalHook",
+    "PreciseBN",
+]
+
+
+"""
+Implement some common hooks.
+"""
+
+
+class CallbackHook(HookBase):
+    """
+    Create a hook using callback functions provided by the user.
+    """
+
+    def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None):
+        """
+        Each argument is a function that takes one argument: the trainer.
+        """
+        self._before_train = before_train
+        self._before_step = before_step
+        self._after_step = after_step
+        self._after_train = after_train
+
+    def before_train(self):
+        if self._before_train:
+            self._before_train(self.trainer)
+
+    def after_train(self):
+        if self._after_train:
+            self._after_train(self.trainer)
+        # The functions may be closures that hold reference to the trainer
+        # Therefore, delete them to avoid circular reference.
+        del self._before_train, self._after_train
+        del self._before_step, self._after_step
+
+    def before_step(self):
+        if self._before_step:
+            self._before_step(self.trainer)
+
+    def after_step(self):
+        if self._after_step:
+            self._after_step(self.trainer)
+
+
+class IterationTimer(HookBase):
+    """
+    Track the time spent for each iteration (each run_step call in the trainer).
+    Print a summary in the end of training.
+
+    This hook uses the time between the call to its :meth:`before_step`
+    and :meth:`after_step` methods.
+    Under the convention that :meth:`before_step` of all hooks should only
+    take negligible amount of time, the :class:`IterationTimer` hook should be
+    placed at the beginning of the list of hooks to obtain accurate timing.
+    """
+
+    def __init__(self, warmup_iter=3):
+        """
+        Args:
+            warmup_iter (int): the number of iterations at the beginning to exclude
+                from timing.
+        """
+        self._warmup_iter = warmup_iter
+        self._step_timer = Timer()
+        self._start_time = time.perf_counter()
+        self._total_timer = Timer()
+
+    def before_train(self):
+        self._start_time = time.perf_counter()
+        self._total_timer.reset()
+        self._total_timer.pause()
+
+    def after_train(self):
+        logger = logging.getLogger(__name__)
+        total_time = time.perf_counter() - self._start_time
+        total_time_minus_hooks = self._total_timer.seconds()
+        hook_time = total_time - total_time_minus_hooks
+
+        num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter
+
+        if num_iter > 0 and total_time_minus_hooks > 0:
+            # Speed is meaningful only after warmup
+            # NOTE this format is parsed by grep in some scripts
+            logger.info(
+                "Overall training speed: {} iterations in {} ({:.4f} s / it)".format(
+                    num_iter,
+                    str(datetime.timedelta(seconds=int(total_time_minus_hooks))),
+                    total_time_minus_hooks / num_iter,
+                )
+            )
+
+        logger.info(
+            "Total training time: {} ({} on hooks)".format(
+                str(datetime.timedelta(seconds=int(total_time))),
+                str(datetime.timedelta(seconds=int(hook_time))),
+            )
+        )
+
+    def before_step(self):
+        self._step_timer.reset()
+        self._total_timer.resume()
+
+    def after_step(self):
+        # +1 because we're in after_step, the current step is done
+        # but not yet counted
+        iter_done = self.trainer.iter - self.trainer.start_iter + 1
+        if iter_done >= self._warmup_iter:
+            sec = self._step_timer.seconds()
+            self.trainer.storage.put_scalars(time=sec)
+        else:
+            self._start_time = time.perf_counter()
+            self._total_timer.reset()
+
+        self._total_timer.pause()
+
+
+class PeriodicWriter(HookBase):
+    """
+    Write events to EventStorage (by calling ``writer.write()``) periodically.
+
+    It is executed every ``period`` iterations and after the last iteration.
+    Note that ``period`` does not affect how data is smoothed by each writer.
+    """
+
+    def __init__(self, writers, period=20):
+        """
+        Args:
+            writers (list[EventWriter]): a list of EventWriter objects
+            period (int):
+        """
+        self._writers = writers
+        for w in writers:
+            assert isinstance(w, EventWriter), w
+        self._period = period
+
+    def after_step(self):
+        if (self.trainer.iter + 1) % self._period == 0 or (
+            self.trainer.iter == self.trainer.max_iter - 1
+        ):
+            for writer in self._writers:
+                writer.write()
+
+    def after_train(self):
+        for writer in self._writers:
+            # If any new data is found (e.g. produced by other after_train),
+            # write them before closing
+            writer.write()
+            writer.close()
+
+
+class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase):
+    """
+    Same as :class:`detectron2.checkpoint.PeriodicCheckpointer`, but as a hook.
+
+    Note that when used as a hook,
+    it is unable to save additional data other than what's defined
+    by the given `checkpointer`.
+
+    It is executed every ``period`` iterations and after the last iteration.
+    """
+
+    def before_train(self):
+        self.max_iter = self.trainer.max_iter
+
+    def after_step(self):
+        # No way to use **kwargs
+        self.step(self.trainer.iter)
+
+
+class LRScheduler(HookBase):
+    """
+    A hook which executes a torch builtin LR scheduler and summarizes the LR.
+    It is executed after every iteration.
+    """
+
+    def __init__(self, optimizer=None, scheduler=None):
+        """
+        Args:
+            optimizer (torch.optim.Optimizer):
+            scheduler (torch.optim.LRScheduler or fvcore.common.param_scheduler.ParamScheduler):
+                if a :class:`ParamScheduler` object, it defines the multiplier over the base LR
+                in the optimizer.
+
+        If any argument is not given, will try to obtain it from the trainer.
+        """
+        self._optimizer = optimizer
+        self._scheduler = scheduler
+
+    def before_train(self):
+        self._optimizer = self._optimizer or self.trainer.optimizer
+        self._scheduler = self._scheduler or self.trainer.scheduler
+        if isinstance(self._scheduler, ParamScheduler):
+            self._scheduler = LRMultiplier(
+                self._optimizer,
+                self._scheduler,
+                self.trainer.max_iter,
+                last_iter=self.trainer.iter - 1,
+            )
+
+        # NOTE: some heuristics on what LR to summarize
+        # summarize the param group with most parameters
+        largest_group = max(len(g["params"]) for g in self._optimizer.param_groups)
+
+        if largest_group == 1:
+            # If all groups have one parameter,
+            # then find the most common initial LR, and use it for summary
+            lr_count = Counter([g["lr"] for g in self._optimizer.param_groups])
+            lr = lr_count.most_common()[0][0]
+            for i, g in enumerate(self._optimizer.param_groups):
+                if g["lr"] == lr:
+                    self._best_param_group_id = i
+                    break
+        else:
+            for i, g in enumerate(self._optimizer.param_groups):
+                if len(g["params"]) == largest_group:
+                    self._best_param_group_id = i
+                    break
+
+    def after_step(self):
+        lr = self._optimizer.param_groups[self._best_param_group_id]["lr"]
+        self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False)
+        self._scheduler.step()
+
+
+class AutogradProfiler(HookBase):
+    """
+    A hook which runs `torch.autograd.profiler.profile`.
+
+    Examples:
+    ::
+        hooks.AutogradProfiler(
+             lambda trainer: trainer.iter > 10 and trainer.iter < 20, self.cfg.OUTPUT_DIR
+        )
+
+    The above example will run the profiler for iteration 10~20 and dump
+    results to ``OUTPUT_DIR``. We did not profile the first few iterations
+    because they are typically slower than the rest.
+    The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
+
+    Note:
+        When used together with NCCL on older version of GPUs,
+        autograd profiler may cause deadlock because it unnecessarily allocates
+        memory on every device it sees. The memory management calls, if
+        interleaved with NCCL calls, lead to deadlock on GPUs that do not
+        support ``cudaLaunchCooperativeKernelMultiDevice``.
+    """
+
+    def __init__(self, enable_predicate, output_dir, *, use_cuda=True):
+        """
+        Args:
+            enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
+                and returns whether to enable the profiler.
+                It will be called once every step, and can be used to select which steps to profile.
+            output_dir (str): the output directory to dump tracing files.
+            use_cuda (bool): same as in `torch.autograd.profiler.profile`.
+        """
+        self._enable_predicate = enable_predicate
+        self._use_cuda = use_cuda
+        self._output_dir = output_dir
+
+    def before_step(self):
+        if self._enable_predicate(self.trainer):
+            self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda)
+            self._profiler.__enter__()
+        else:
+            self._profiler = None
+
+    def after_step(self):
+        if self._profiler is None:
+            return
+        self._profiler.__exit__(None, None, None)
+        PathManager.mkdirs(self._output_dir)
+        out_file = os.path.join(
+            self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)
+        )
+        if "://" not in out_file:
+            self._profiler.export_chrome_trace(out_file)
+        else:
+            # Support non-posix filesystems
+            with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d:
+                tmp_file = os.path.join(d, "tmp.json")
+                self._profiler.export_chrome_trace(tmp_file)
+                with open(tmp_file) as f:
+                    content = f.read()
+            with PathManager.open(out_file, "w") as f:
+                f.write(content)
+
+
+class EvalHook(HookBase):
+    """
+    Run an evaluation function periodically, and at the end of training.
+
+    It is executed every ``eval_period`` iterations and after the last iteration.
+    """
+
+    def __init__(self, eval_period, eval_function):
+        """
+        Args:
+            eval_period (int): the period to run `eval_function`. Set to 0 to
+                not evaluate periodically (but still after the last iteration).
+            eval_function (callable): a function which takes no arguments, and
+                returns a nested dict of evaluation metrics.
+
+        Note:
+            This hook must be enabled in all or none workers.
+            If you would like only certain workers to perform evaluation,
+            give other workers a no-op function (`eval_function=lambda: None`).
+        """
+        self._period = eval_period
+        self._func = eval_function
+
+    def _do_eval(self):
+        results = self._func()
+
+        if results:
+            assert isinstance(
+                results, dict
+            ), "Eval function must return a dict. Got {} instead.".format(results)
+
+            flattened_results = flatten_results_dict(results)
+            for k, v in flattened_results.items():
+                try:
+                    v = float(v)
+                except Exception as e:
+                    raise ValueError(
+                        "[EvalHook] eval_function should return a nested dict of float. "
+                        "Got '{}: {}' instead.".format(k, v)
+                    ) from e
+            self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False)
+
+        # Evaluation may take different time among workers.
+        # A barrier make them start the next iteration together.
+        comm.synchronize()
+
+    def after_step(self):
+        next_iter = self.trainer.iter + 1
+        if self._period > 0 and next_iter % self._period == 0:
+            self._do_eval()
+
+    def after_train(self):
+        # This condition is to prevent the eval from running after a failed training
+        if self.trainer.iter + 1 >= self.trainer.max_iter:
+            self._do_eval()
+        # func is likely a closure that holds reference to the trainer
+        # therefore we clean it to avoid circular reference in the end
+        del self._func
+
+
+class PreciseBN(HookBase):
+    """
+    The standard implementation of BatchNorm uses EMA in inference, which is
+    sometimes suboptimal.
+    This class computes the true average of statistics rather than the moving average,
+    and put true averages to every BN layer in the given model.
+
+    It is executed every ``period`` iterations and after the last iteration.
+    """
+
+    def __init__(self, period, model, data_loader, num_iter):
+        """
+        Args:
+            period (int): the period this hook is run, or 0 to not run during training.
+                The hook will always run in the end of training.
+            model (nn.Module): a module whose all BN layers in training mode will be
+                updated by precise BN.
+                Note that user is responsible for ensuring the BN layers to be
+                updated are in training mode when this hook is triggered.
+            data_loader (iterable): it will produce data to be run by `model(data)`.
+            num_iter (int): number of iterations used to compute the precise
+                statistics.
+        """
+        self._logger = logging.getLogger(__name__)
+        if len(get_bn_modules(model)) == 0:
+            self._logger.info(
+                "PreciseBN is disabled because model does not contain BN layers in training mode."
+            )
+            self._disabled = True
+            return
+
+        self._model = model
+        self._data_loader = data_loader
+        self._num_iter = num_iter
+        self._period = period
+        self._disabled = False
+
+        self._data_iter = None
+
+    def after_step(self):
+        next_iter = self.trainer.iter + 1
+        is_final = next_iter == self.trainer.max_iter
+        if is_final or (self._period > 0 and next_iter % self._period == 0):
+            self.update_stats()
+
+    def update_stats(self):
+        """
+        Update the model with precise statistics. Users can manually call this method.
+        """
+        if self._disabled:
+            return
+
+        if self._data_iter is None:
+            self._data_iter = iter(self._data_loader)
+
+        def data_loader():
+            for num_iter in itertools.count(1):
+                if num_iter % 100 == 0:
+                    self._logger.info(
+                        "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter)
+                    )
+                # This way we can reuse the same iterator
+                yield next(self._data_iter)
+
+        with EventStorage():  # capture events in a new storage to discard them
+            self._logger.info(
+                "Running precise-BN for {} iterations...  ".format(self._num_iter)
+                + "Note that this could produce different statistics every time."
+            )
+            update_bn_stats(self._model, data_loader(), self._num_iter)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/launch.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/launch.py
new file mode 100644
index 0000000000000000000000000000000000000000..40dad262dd9929e6e4e9c60424b3fda1ab97318c
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/launch.py
@@ -0,0 +1,125 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+from datetime import timedelta
+import torch
+import torch.distributed as dist
+import torch.multiprocessing as mp
+
+from detectron2.utils import comm
+
+__all__ = ["DEFAULT_TIMEOUT", "launch"]
+
+DEFAULT_TIMEOUT = timedelta(minutes=30)
+
+
+def _find_free_port():
+    import socket
+
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    # Binding to port 0 will cause the OS to find an available port for us
+    sock.bind(("", 0))
+    port = sock.getsockname()[1]
+    sock.close()
+    # NOTE: there is still a chance the port could be taken by other processes.
+    return port
+
+
+def launch(
+    main_func,
+    num_gpus_per_machine,
+    num_machines=1,
+    machine_rank=0,
+    dist_url=None,
+    args=(),
+    timeout=DEFAULT_TIMEOUT,
+):
+    """
+    Launch multi-gpu or distributed training.
+    This function must be called on all machines involved in the training.
+    It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine.
+
+    Args:
+        main_func: a function that will be called by `main_func(*args)`
+        num_gpus_per_machine (int): number of GPUs per machine
+        num_machines (int): the total number of machines
+        machine_rank (int): the rank of this machine
+        dist_url (str): url to connect to for distributed jobs, including protocol
+                       e.g. "tcp://127.0.0.1:8686".
+                       Can be set to "auto" to automatically select a free port on localhost
+        timeout (timedelta): timeout of the distributed workers
+        args (tuple): arguments passed to main_func
+    """
+    world_size = num_machines * num_gpus_per_machine
+    if world_size > 1:
+        # https://github.com/pytorch/pytorch/pull/14391
+        # TODO prctl in spawned processes
+
+        if dist_url == "auto":
+            assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
+            port = _find_free_port()
+            dist_url = f"tcp://127.0.0.1:{port}"
+        if num_machines > 1 and dist_url.startswith("file://"):
+            logger = logging.getLogger(__name__)
+            logger.warning(
+                "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://"
+            )
+
+        mp.spawn(
+            _distributed_worker,
+            nprocs=num_gpus_per_machine,
+            args=(
+                main_func,
+                world_size,
+                num_gpus_per_machine,
+                machine_rank,
+                dist_url,
+                args,
+                timeout,
+            ),
+            daemon=False,
+        )
+    else:
+        main_func(*args)
+
+
+def _distributed_worker(
+    local_rank,
+    main_func,
+    world_size,
+    num_gpus_per_machine,
+    machine_rank,
+    dist_url,
+    args,
+    timeout=DEFAULT_TIMEOUT,
+):
+    assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
+    global_rank = machine_rank * num_gpus_per_machine + local_rank
+    try:
+        dist.init_process_group(
+            backend="NCCL",
+            init_method=dist_url,
+            world_size=world_size,
+            rank=global_rank,
+            timeout=timeout,
+        )
+    except Exception as e:
+        logger = logging.getLogger(__name__)
+        logger.error("Process group URL: {}".format(dist_url))
+        raise e
+    # synchronize is needed here to prevent a possible timeout after calling init_process_group
+    # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
+    comm.synchronize()
+
+    assert num_gpus_per_machine <= torch.cuda.device_count()
+    torch.cuda.set_device(local_rank)
+
+    # Setup the local process group (which contains ranks within the same machine)
+    assert comm._LOCAL_PROCESS_GROUP is None
+    num_machines = world_size // num_gpus_per_machine
+    for i in range(num_machines):
+        ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
+        pg = dist.new_group(ranks_on_i)
+        if i == machine_rank:
+            comm._LOCAL_PROCESS_GROUP = pg
+
+    main_func(*args)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/train_loop.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/train_loop.py
new file mode 100644
index 0000000000000000000000000000000000000000..25292d70a44143e2f89da552f13631624dae3db4
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/engine/train_loop.py
@@ -0,0 +1,343 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+import numpy as np
+import time
+import weakref
+from typing import Dict, List, Optional
+import torch
+from torch.nn.parallel import DataParallel, DistributedDataParallel
+
+import detectron2.utils.comm as comm
+from detectron2.utils.events import EventStorage, get_event_storage
+from detectron2.utils.logger import _log_api_usage
+
+__all__ = ["HookBase", "TrainerBase", "SimpleTrainer", "AMPTrainer"]
+
+
+class HookBase:
+    """
+    Base class for hooks that can be registered with :class:`TrainerBase`.
+
+    Each hook can implement 4 methods. The way they are called is demonstrated
+    in the following snippet:
+    ::
+        hook.before_train()
+        for iter in range(start_iter, max_iter):
+            hook.before_step()
+            trainer.run_step()
+            hook.after_step()
+        iter += 1
+        hook.after_train()
+
+    Notes:
+        1. In the hook method, users can access ``self.trainer`` to access more
+           properties about the context (e.g., model, current iteration, or config
+           if using :class:`DefaultTrainer`).
+
+        2. A hook that does something in :meth:`before_step` can often be
+           implemented equivalently in :meth:`after_step`.
+           If the hook takes non-trivial time, it is strongly recommended to
+           implement the hook in :meth:`after_step` instead of :meth:`before_step`.
+           The convention is that :meth:`before_step` should only take negligible time.
+
+           Following this convention will allow hooks that do care about the difference
+           between :meth:`before_step` and :meth:`after_step` (e.g., timer) to
+           function properly.
+
+    Attributes:
+        trainer (TrainerBase): A weak reference to the trainer object. Set by the trainer
+            when the hook is registered.
+    """
+
+    def before_train(self):
+        """
+        Called before the first iteration.
+        """
+        pass
+
+    def after_train(self):
+        """
+        Called after the last iteration.
+        """
+        pass
+
+    def before_step(self):
+        """
+        Called before each iteration.
+        """
+        pass
+
+    def after_step(self):
+        """
+        Called after each iteration.
+        """
+        pass
+
+
+class TrainerBase:
+    """
+    Base class for iterative trainer with hooks.
+
+    The only assumption we made here is: the training runs in a loop.
+    A subclass can implement what the loop is.
+    We made no assumptions about the existence of dataloader, optimizer, model, etc.
+
+    Attributes:
+        iter(int): the current iteration.
+
+        start_iter(int): The iteration to start with.
+            By convention the minimum possible value is 0.
+
+        max_iter(int): The iteration to end training.
+
+        storage(EventStorage): An EventStorage that's opened during the course of training.
+    """
+
+    def __init__(self) -> None:
+        self._hooks: List[HookBase] = []
+        self.iter: int
+        self.start_iter: int
+        self.max_iter: int
+        self.storage: EventStorage
+        _log_api_usage("trainer." + self.__class__.__name__)
+
+    def register_hooks(self, hooks: List[Optional[HookBase]]) -> None:
+        """
+        Register hooks to the trainer. The hooks are executed in the order
+        they are registered.
+
+        Args:
+            hooks (list[Optional[HookBase]]): list of hooks
+        """
+        hooks = [h for h in hooks if h is not None]
+        for h in hooks:
+            assert isinstance(h, HookBase)
+            # To avoid circular reference, hooks and trainer cannot own each other.
+            # This normally does not matter, but will cause memory leak if the
+            # involved objects contain __del__:
+            # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/
+            h.trainer = weakref.proxy(self)
+        self._hooks.extend(hooks)
+
+    def train(self, start_iter: int, max_iter: int):
+        """
+        Args:
+            start_iter, max_iter (int): See docs above
+        """
+        logger = logging.getLogger(__name__)
+        logger.info("Starting training from iteration {}".format(start_iter))
+
+        self.iter = self.start_iter = start_iter
+        self.max_iter = max_iter
+
+        with EventStorage(start_iter) as self.storage:
+            try:
+                self.before_train()
+                for self.iter in range(start_iter, max_iter):
+                    self.before_step()
+                    self.run_step()
+                    self.after_step()
+                # self.iter == max_iter can be used by `after_train` to
+                # tell whether the training successfully finished or failed
+                # due to exceptions.
+                self.iter += 1
+            except Exception:
+                logger.exception("Exception during training:")
+                raise
+            finally:
+                self.after_train()
+
+    def before_train(self):
+        for h in self._hooks:
+            h.before_train()
+
+    def after_train(self):
+        self.storage.iter = self.iter
+        for h in self._hooks:
+            h.after_train()
+
+    def before_step(self):
+        # Maintain the invariant that storage.iter == trainer.iter
+        # for the entire execution of each step
+        self.storage.iter = self.iter
+
+        for h in self._hooks:
+            h.before_step()
+
+    def after_step(self):
+        for h in self._hooks:
+            h.after_step()
+
+    def run_step(self):
+        raise NotImplementedError
+
+
+class SimpleTrainer(TrainerBase):
+    """
+    A simple trainer for the most common type of task:
+    single-cost single-optimizer single-data-source iterative optimization,
+    optionally using data-parallelism.
+    It assumes that every step, you:
+
+    1. Compute the loss with a data from the data_loader.
+    2. Compute the gradients with the above loss.
+    3. Update the model with the optimizer.
+
+    All other tasks during training (checkpointing, logging, evaluation, LR schedule)
+    are maintained by hooks, which can be registered by :meth:`TrainerBase.register_hooks`.
+
+    If you want to do anything fancier than this,
+    either subclass TrainerBase and implement your own `run_step`,
+    or write your own training loop.
+    """
+
+    def __init__(self, model, data_loader, optimizer):
+        """
+        Args:
+            model: a torch Module. Takes a data from data_loader and returns a
+                dict of losses.
+            data_loader: an iterable. Contains data to be used to call model.
+            optimizer: a torch optimizer.
+        """
+        super().__init__()
+
+        """
+        We set the model to training mode in the trainer.
+        However it's valid to train a model that's in eval mode.
+        If you want your model (or a submodule of it) to behave
+        like evaluation during training, you can overwrite its train() method.
+        """
+        model.train()
+
+        self.model = model
+        self.data_loader = data_loader
+        self._data_loader_iter = iter(data_loader)
+        self.optimizer = optimizer
+
+    def run_step(self):
+        """
+        Implement the standard training logic described above.
+        """
+        assert self.model.training, "[SimpleTrainer] model was changed to eval mode!"
+        start = time.perf_counter()
+        """
+        If you want to do something with the data, you can wrap the dataloader.
+        """
+        data = next(self._data_loader_iter)
+        data_time = time.perf_counter() - start
+
+        """
+        If you want to do something with the losses, you can wrap the model.
+        """
+        loss_dict = self.model(data)
+        losses = sum(loss_dict.values())
+
+        """
+        If you need to accumulate gradients or do something similar, you can
+        wrap the optimizer with your custom `zero_grad()` method.
+        """
+        self.optimizer.zero_grad()
+        losses.backward()
+
+        self._write_metrics(loss_dict, data_time)
+
+        """
+        If you need gradient clipping/scaling or other processing, you can
+        wrap the optimizer with your custom `step()` method. But it is
+        suboptimal as explained in https://arxiv.org/abs/2006.15704 Sec 3.2.4
+        """
+        self.optimizer.step()
+
+    def _write_metrics(
+        self,
+        loss_dict: Dict[str, torch.Tensor],
+        data_time: float,
+        prefix: str = "",
+    ):
+        """
+        Args:
+            loss_dict (dict): dict of scalar losses
+            data_time (float): time taken by the dataloader iteration
+        """
+        metrics_dict = {k: v.detach().cpu().item() for k, v in loss_dict.items()}
+        metrics_dict["data_time"] = data_time
+
+        # Gather metrics among all workers for logging
+        # This assumes we do DDP-style training, which is currently the only
+        # supported method in detectron2.
+        all_metrics_dict = comm.gather(metrics_dict)
+
+        if comm.is_main_process():
+            storage = get_event_storage()
+
+            # data_time among workers can have high variance. The actual latency
+            # caused by data_time is the maximum among workers.
+            data_time = np.max([x.pop("data_time") for x in all_metrics_dict])
+            storage.put_scalar("data_time", data_time)
+
+            # average the rest metrics
+            metrics_dict = {
+                k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys()
+            }
+            total_losses_reduced = sum(metrics_dict.values())
+            if not np.isfinite(total_losses_reduced):
+                raise FloatingPointError(
+                    f"Loss became infinite or NaN at iteration={self.iter}!\n"
+                    f"loss_dict = {metrics_dict}"
+                )
+
+            storage.put_scalar("{}total_loss".format(prefix), total_losses_reduced)
+            if len(metrics_dict) > 1:
+                storage.put_scalars(**metrics_dict)
+
+
+class AMPTrainer(SimpleTrainer):
+    """
+    Like :class:`SimpleTrainer`, but uses PyTorch's native automatic mixed precision
+    in the training loop.
+    """
+
+    def __init__(self, model, data_loader, optimizer, grad_scaler=None):
+        """
+        Args:
+            model, data_loader, optimizer: same as in :class:`SimpleTrainer`.
+            grad_scaler: torch GradScaler to automatically scale gradients.
+        """
+        unsupported = "AMPTrainer does not support single-process multi-device training!"
+        if isinstance(model, DistributedDataParallel):
+            assert not (model.device_ids and len(model.device_ids) > 1), unsupported
+        assert not isinstance(model, DataParallel), unsupported
+
+        super().__init__(model, data_loader, optimizer)
+
+        if grad_scaler is None:
+            from torch.cuda.amp import GradScaler
+
+            grad_scaler = GradScaler()
+        self.grad_scaler = grad_scaler
+
+    def run_step(self):
+        """
+        Implement the AMP training logic.
+        """
+        assert self.model.training, "[AMPTrainer] model was changed to eval mode!"
+        assert torch.cuda.is_available(), "[AMPTrainer] CUDA is required for AMP training!"
+        from torch.cuda.amp import autocast
+
+        start = time.perf_counter()
+        data = next(self._data_loader_iter)
+        data_time = time.perf_counter() - start
+
+        with autocast():
+            loss_dict = self.model(data)
+            losses = sum(loss_dict.values())
+
+        self.optimizer.zero_grad()
+        self.grad_scaler.scale(losses).backward()
+
+        self._write_metrics(loss_dict, data_time)
+
+        self.grad_scaler.step(self.optimizer)
+        self.grad_scaler.update()
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a059c9cb3eb66f5e107721e30b5c9eda5122ec5
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator
+from .coco_evaluation import COCOEvaluator
+from .rotated_coco_evaluation import RotatedCOCOEvaluator
+from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
+from .lvis_evaluation import LVISEvaluator
+from .panoptic_evaluation import COCOPanopticEvaluator
+from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
+from .sem_seg_evaluation import SemSegEvaluator
+from .testing import print_csv_format, verify_results
+from .text_evaluation import TextEvaluator
+from .text_eval_script import text_eval_main
+from . import rrc_evaluation_funcs
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/cityscapes_evaluation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/cityscapes_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fb6c4cd5f752d639570d022cb23ce18491c370a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/cityscapes_evaluation.py
@@ -0,0 +1,194 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import glob
+import logging
+import numpy as np
+import os
+import tempfile
+from collections import OrderedDict
+import torch
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+
+class CityscapesEvaluator(DatasetEvaluator):
+    """
+    Base class for evaluation using cityscapes API.
+    """
+
+    def __init__(self, dataset_name):
+        """
+        Args:
+            dataset_name (str): the name of the dataset.
+                It must have the following metadata associated with it:
+                "thing_classes", "gt_dir".
+        """
+        self._metadata = MetadataCatalog.get(dataset_name)
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+
+    def reset(self):
+        self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_")
+        self._temp_dir = self._working_dir.name
+        # All workers will write to the same results directory
+        # TODO this does not work in distributed training
+        self._temp_dir = comm.all_gather(self._temp_dir)[0]
+        if self._temp_dir != self._working_dir.name:
+            self._working_dir.cleanup()
+        self._logger.info(
+            "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir)
+        )
+
+
+class CityscapesInstanceEvaluator(CityscapesEvaluator):
+    """
+    Evaluate instance segmentation results on cityscapes dataset using cityscapes API.
+
+    Note:
+        * It does not work in multi-machine distributed training.
+        * It contains a synchronization, therefore has to be used on all ranks.
+        * Only the main process runs evaluation.
+    """
+
+    def process(self, inputs, outputs):
+        from cityscapesscripts.helpers.labels import name2label
+
+        for input, output in zip(inputs, outputs):
+            file_name = input["file_name"]
+            basename = os.path.splitext(os.path.basename(file_name))[0]
+            pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt")
+
+            if "instances" in output:
+                output = output["instances"].to(self._cpu_device)
+                num_instances = len(output)
+                with open(pred_txt, "w") as fout:
+                    for i in range(num_instances):
+                        pred_class = output.pred_classes[i]
+                        classes = self._metadata.thing_classes[pred_class]
+                        class_id = name2label[classes].id
+                        score = output.scores[i]
+                        mask = output.pred_masks[i].numpy().astype("uint8")
+                        png_filename = os.path.join(
+                            self._temp_dir, basename + "_{}_{}.png".format(i, classes)
+                        )
+
+                        Image.fromarray(mask * 255).save(png_filename)
+                        fout.write(
+                            "{} {} {}\n".format(os.path.basename(png_filename), class_id, score)
+                        )
+            else:
+                # Cityscapes requires a prediction file for every ground truth image.
+                with open(pred_txt, "w") as fout:
+                    pass
+
+    def evaluate(self):
+        """
+        Returns:
+            dict: has a key "segm", whose value is a dict of "AP" and "AP50".
+        """
+        comm.synchronize()
+        if comm.get_rank() > 0:
+            return
+        import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval
+
+        self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
+
+        # set some global states in cityscapes evaluation API, before evaluating
+        cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
+        cityscapes_eval.args.predictionWalk = None
+        cityscapes_eval.args.JSONOutput = False
+        cityscapes_eval.args.colorized = False
+        cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json")
+
+        # These lines are adopted from
+        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa
+        gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
+        groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png"))
+        assert len(
+            groundTruthImgList
+        ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+            cityscapes_eval.args.groundTruthSearch
+        )
+        predictionImgList = []
+        for gt in groundTruthImgList:
+            predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args))
+        results = cityscapes_eval.evaluateImgLists(
+            predictionImgList, groundTruthImgList, cityscapes_eval.args
+        )["averages"]
+
+        ret = OrderedDict()
+        ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100}
+        self._working_dir.cleanup()
+        return ret
+
+
+class CityscapesSemSegEvaluator(CityscapesEvaluator):
+    """
+    Evaluate semantic segmentation results on cityscapes dataset using cityscapes API.
+
+    Note:
+        * It does not work in multi-machine distributed training.
+        * It contains a synchronization, therefore has to be used on all ranks.
+        * Only the main process runs evaluation.
+    """
+
+    def process(self, inputs, outputs):
+        from cityscapesscripts.helpers.labels import trainId2label
+
+        for input, output in zip(inputs, outputs):
+            file_name = input["file_name"]
+            basename = os.path.splitext(os.path.basename(file_name))[0]
+            pred_filename = os.path.join(self._temp_dir, basename + "_pred.png")
+
+            output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy()
+            pred = 255 * np.ones(output.shape, dtype=np.uint8)
+            for train_id, label in trainId2label.items():
+                if label.ignoreInEval:
+                    continue
+                pred[output == train_id] = label.id
+            Image.fromarray(pred).save(pred_filename)
+
+    def evaluate(self):
+        comm.synchronize()
+        if comm.get_rank() > 0:
+            return
+        # Load the Cityscapes eval script *after* setting the required env var,
+        # since the script reads CITYSCAPES_DATASET into global variables at load time.
+        import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval
+
+        self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
+
+        # set some global states in cityscapes evaluation API, before evaluating
+        cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
+        cityscapes_eval.args.predictionWalk = None
+        cityscapes_eval.args.JSONOutput = False
+        cityscapes_eval.args.colorized = False
+
+        # These lines are adopted from
+        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa
+        gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
+        groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png"))
+        assert len(
+            groundTruthImgList
+        ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+            cityscapes_eval.args.groundTruthSearch
+        )
+        predictionImgList = []
+        for gt in groundTruthImgList:
+            predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt))
+        results = cityscapes_eval.evaluateImgLists(
+            predictionImgList, groundTruthImgList, cityscapes_eval.args
+        )
+        ret = OrderedDict()
+        ret["sem_seg"] = {
+            "IoU": 100.0 * results["averageScoreClasses"],
+            "iIoU": 100.0 * results["averageScoreInstClasses"],
+            "IoU_sup": 100.0 * results["averageScoreCategories"],
+            "iIoU_sup": 100.0 * results["averageScoreInstCategories"],
+        }
+        self._working_dir.cleanup()
+        return ret
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/coco_evaluation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/coco_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2131d7475a8649a86df2112a13ee6d187089161
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/coco_evaluation.py
@@ -0,0 +1,579 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import copy
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import pickle
+from collections import OrderedDict
+import pycocotools.mask as mask_util
+import torch
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from tabulate import tabulate
+
+import detectron2.utils.comm as comm
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.data.datasets.coco import convert_to_coco_json
+from detectron2.evaluation.fast_eval_api import COCOeval_opt
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+
+from .evaluator import DatasetEvaluator
+
+
+class COCOEvaluator(DatasetEvaluator):
+    """
+    Evaluate AR for object proposals, AP for instance detection/segmentation, AP
+    for keypoint detection outputs using COCO's metrics.
+    See http://cocodataset.org/#detection-eval and
+    http://cocodataset.org/#keypoints-eval to understand its metrics.
+    The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means
+    the metric cannot be computed (e.g. due to no predictions made).
+
+    In addition to COCO, this evaluator is able to support any bounding box detection,
+    instance segmentation, or keypoint detection dataset.
+    """
+
+    def __init__(
+        self,
+        dataset_name,
+        tasks=None,
+        distributed=True,
+        output_dir=None,
+        *,
+        use_fast_impl=True,
+        kpt_oks_sigmas=(),
+    ):
+        """
+        Args:
+            dataset_name (str): name of the dataset to be evaluated.
+                It must have either the following corresponding metadata:
+
+                    "json_file": the path to the COCO format annotation
+
+                Or it must be in detectron2's standard dataset format
+                so it can be converted to COCO format automatically.
+            tasks (tuple[str]): tasks that can be evaluated under the given
+                configuration. A task is one of "bbox", "segm", "keypoints".
+                By default, will infer this automatically from predictions.
+            distributed (True): if True, will collect results from all ranks and run evaluation
+                in the main process.
+                Otherwise, will only evaluate the results in the current process.
+            output_dir (str): optional, an output directory to dump all
+                results predicted on the dataset. The dump contains two files:
+
+                1. "instances_predictions.pth" a file that can be loaded with `torch.load` and
+                   contains all the results in the format they are produced by the model.
+                2. "coco_instances_results.json" a json file in COCO's result format.
+            use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP.
+                Although the results should be very close to the official implementation in COCO
+                API, it is still recommended to compute results with the official API for use in
+                papers. The faster implementation also uses more RAM.
+            kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS.
+                See http://cocodataset.org/#keypoints-eval
+                When empty, it will use the defaults in COCO.
+                Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
+        """
+        self._logger = logging.getLogger(__name__)
+        self._distributed = distributed
+        self._output_dir = output_dir
+        self._use_fast_impl = use_fast_impl
+
+        if tasks is not None and isinstance(tasks, CfgNode):
+            kpt_oks_sigmas = (
+                tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas
+            )
+            self._logger.warn(
+                "COCO Evaluator instantiated using config, this is deprecated behavior."
+                " Please pass in explicit arguments instead."
+            )
+            self._tasks = None  # Infering it from predictions should be better
+        else:
+            self._tasks = tasks
+
+        self._cpu_device = torch.device("cpu")
+
+        self._metadata = MetadataCatalog.get(dataset_name)
+        if not hasattr(self._metadata, "json_file"):
+            self._logger.info(
+                f"'{dataset_name}' is not registered by `register_coco_instances`."
+                " Therefore trying to convert it to COCO format ..."
+            )
+
+            cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json")
+            self._metadata.json_file = cache_path
+            convert_to_coco_json(dataset_name, cache_path)
+
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        with contextlib.redirect_stdout(io.StringIO()):
+            self._coco_api = COCO(json_file)
+
+        # Test set json files do not contain annotations (evaluation must be
+        # performed using the COCO evaluation server).
+        self._do_evaluation = "annotations" in self._coco_api.dataset
+        if self._do_evaluation:
+            self._kpt_oks_sigmas = kpt_oks_sigmas
+
+    def reset(self):
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a COCO model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+        """
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+
+            if "instances" in output:
+                instances = output["instances"].to(self._cpu_device)
+                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
+            if "proposals" in output:
+                prediction["proposals"] = output["proposals"].to(self._cpu_device)
+            if len(prediction) > 1:
+                self._predictions.append(prediction)
+
+    def evaluate(self, img_ids=None):
+        """
+        Args:
+            img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset
+        """
+        if self._distributed:
+            comm.synchronize()
+            predictions = comm.gather(self._predictions, dst=0)
+            predictions = list(itertools.chain(*predictions))
+
+            if not comm.is_main_process():
+                return {}
+        else:
+            predictions = self._predictions
+
+        if len(predictions) == 0:
+            self._logger.warning("[COCOEvaluator] Did not receive valid predictions.")
+            return {}
+
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "instances_predictions.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(predictions, f)
+
+        self._results = OrderedDict()
+        if "proposals" in predictions[0]:
+            self._eval_box_proposals(predictions)
+        if "instances" in predictions[0]:
+            self._eval_predictions(predictions, img_ids=img_ids)
+        # Copy so the caller can do whatever with results
+        return copy.deepcopy(self._results)
+
+    def _tasks_from_predictions(self, predictions):
+        """
+        Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions.
+        """
+        tasks = {"bbox"}
+        for pred in predictions:
+            if "segmentation" in pred:
+                tasks.add("segm")
+            if "keypoints" in pred:
+                tasks.add("keypoints")
+        return sorted(tasks)
+
+    def _eval_predictions(self, predictions, img_ids=None):
+        """
+        Evaluate predictions. Fill self._results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        tasks = self._tasks or self._tasks_from_predictions(coco_results)
+
+        # unmap the category ids for COCO
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id
+            all_contiguous_ids = list(dataset_id_to_contiguous_id.values())
+            num_classes = len(all_contiguous_ids)
+            assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1
+
+            reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()}
+            for result in coco_results:
+                category_id = result["category_id"]
+                assert category_id < num_classes, (
+                    f"A prediction has class={category_id}, "
+                    f"but the dataset only has {num_classes} classes and "
+                    f"predicted class id should be in [0, {num_classes - 1}]."
+                )
+                result["category_id"] = reverse_id_mapping[category_id]
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(coco_results))
+                f.flush()
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info(
+            "Evaluating predictions with {} COCO API...".format(
+                "unofficial" if self._use_fast_impl else "official"
+            )
+        )
+        for task in sorted(tasks):
+            assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!"
+            coco_eval = (
+                _evaluate_predictions_on_coco(
+                    self._coco_api,
+                    coco_results,
+                    task,
+                    kpt_oks_sigmas=self._kpt_oks_sigmas,
+                    use_fast_impl=self._use_fast_impl,
+                    img_ids=img_ids,
+                )
+                if len(coco_results) > 0
+                else None  # cocoapi does not handle empty results very well
+            )
+
+            res = self._derive_coco_results(
+                coco_eval, task, class_names=self._metadata.get("thing_classes")
+            )
+            self._results[task] = res
+
+    def _eval_box_proposals(self, predictions):
+        """
+        Evaluate the box proposals in predictions.
+        Fill self._results with the metrics for "box_proposals" task.
+        """
+        if self._output_dir:
+            # Saving generated box proposals to file.
+            # Predicted box_proposals are in XYXY_ABS mode.
+            bbox_mode = BoxMode.XYXY_ABS.value
+            ids, boxes, objectness_logits = [], [], []
+            for prediction in predictions:
+                ids.append(prediction["image_id"])
+                boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
+                objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
+
+            proposal_data = {
+                "boxes": boxes,
+                "objectness_logits": objectness_logits,
+                "ids": ids,
+                "bbox_mode": bbox_mode,
+            }
+            with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
+                pickle.dump(proposal_data, f)
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info("Evaluating bbox proposals ...")
+        res = {}
+        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
+        for limit in [100, 1000]:
+            for area, suffix in areas.items():
+                stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit)
+                key = "AR{}@{:d}".format(suffix, limit)
+                res[key] = float(stats["ar"].item() * 100)
+        self._logger.info("Proposal metrics: \n" + create_small_table(res))
+        self._results["box_proposals"] = res
+
+    def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
+        """
+        Derive the desired score numbers from summarized COCOeval.
+
+        Args:
+            coco_eval (None or COCOEval): None represents no predictions from model.
+            iou_type (str):
+            class_names (None or list[str]): if provided, will use it to predict
+                per-category AP.
+
+        Returns:
+            a dict of {metric name: score}
+        """
+
+        metrics = {
+            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
+            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
+            "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
+        }[iou_type]
+
+        if coco_eval is None:
+            self._logger.warn("No predictions from the model!")
+            return {metric: float("nan") for metric in metrics}
+
+        # the standard metrics
+        results = {
+            metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan")
+            for idx, metric in enumerate(metrics)
+        }
+        self._logger.info(
+            "Evaluation results for {}: \n".format(iou_type) + create_small_table(results)
+        )
+        if not np.isfinite(sum(results.values())):
+            self._logger.info("Some metrics cannot be computed and is shown as NaN.")
+
+        if class_names is None or len(class_names) <= 1:
+            return results
+        # Compute per-category AP
+        # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa
+        precisions = coco_eval.eval["precision"]
+        # precision has dims (iou, recall, cls, area range, max dets)
+        assert len(class_names) == precisions.shape[2]
+
+        results_per_category = []
+        for idx, name in enumerate(class_names):
+            # area range index 0: all area ranges
+            # max dets index -1: typically 100 per image
+            precision = precisions[:, :, idx, 0, -1]
+            precision = precision[precision > -1]
+            ap = np.mean(precision) if precision.size else float("nan")
+            results_per_category.append(("{}".format(name), float(ap * 100)))
+
+        # tabulate it
+        N_COLS = min(6, len(results_per_category) * 2)
+        results_flatten = list(itertools.chain(*results_per_category))
+        results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)])
+        table = tabulate(
+            results_2d,
+            tablefmt="pipe",
+            floatfmt=".3f",
+            headers=["category", "AP"] * (N_COLS // 2),
+            numalign="left",
+        )
+        self._logger.info("Per-category {} AP: \n".format(iou_type) + table)
+
+        results.update({"AP-" + name: ap for name, ap in results_per_category})
+        return results
+
+
+def instances_to_coco_json(instances, img_id):
+    """
+    Dump an "Instances" object to a COCO-format json that's used for evaluation.
+
+    Args:
+        instances (Instances):
+        img_id (int): the image id
+
+    Returns:
+        list[dict]: list of json annotations in COCO format.
+    """
+    num_instance = len(instances)
+    if num_instance == 0:
+        return []
+
+    boxes = instances.pred_boxes.tensor.numpy()
+    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    boxes = boxes.tolist()
+    scores = instances.scores.tolist()
+    classes = instances.pred_classes.tolist()
+
+    has_mask = instances.has("pred_masks")
+    if has_mask:
+        # use RLE to encode the masks, because they are too large and takes memory
+        # since this evaluator stores outputs of the entire dataset
+        rles = [
+            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
+            for mask in instances.pred_masks
+        ]
+        for rle in rles:
+            # "counts" is an array encoded by mask_util as a byte-stream. Python3's
+            # json writer which always produces strings cannot serialize a bytestream
+            # unless you decode it. Thankfully, utf-8 works out (which is also what
+            # the pycocotools/_mask.pyx does).
+            rle["counts"] = rle["counts"].decode("utf-8")
+
+    has_keypoints = instances.has("pred_keypoints")
+    if has_keypoints:
+        keypoints = instances.pred_keypoints
+
+    results = []
+    for k in range(num_instance):
+        result = {
+            "image_id": img_id,
+            "category_id": classes[k],
+            "bbox": boxes[k],
+            "score": scores[k],
+        }
+        if has_mask:
+            result["segmentation"] = rles[k]
+        if has_keypoints:
+            # In COCO annotations,
+            # keypoints coordinates are pixel indices.
+            # However our predictions are floating point coordinates.
+            # Therefore we subtract 0.5 to be consistent with the annotation format.
+            # This is the inverse of data loading logic in `datasets/coco.py`.
+            keypoints[k][:, :2] -= 0.5
+            result["keypoints"] = keypoints[k].flatten().tolist()
+        results.append(result)
+    return results
+
+
+# inspired from Detectron:
+# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
+def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None):
+    """
+    Evaluate detection proposal recall metrics. This function is a much
+    faster alternative to the official COCO API recall evaluation code. However,
+    it produces slightly different results.
+    """
+    # Record max overlap value for each gt box
+    # Return vector of overlap values
+    areas = {
+        "all": 0,
+        "small": 1,
+        "medium": 2,
+        "large": 3,
+        "96-128": 4,
+        "128-256": 5,
+        "256-512": 6,
+        "512-inf": 7,
+    }
+    area_ranges = [
+        [0 ** 2, 1e5 ** 2],  # all
+        [0 ** 2, 32 ** 2],  # small
+        [32 ** 2, 96 ** 2],  # medium
+        [96 ** 2, 1e5 ** 2],  # large
+        [96 ** 2, 128 ** 2],  # 96-128
+        [128 ** 2, 256 ** 2],  # 128-256
+        [256 ** 2, 512 ** 2],  # 256-512
+        [512 ** 2, 1e5 ** 2],
+    ]  # 512-inf
+    assert area in areas, "Unknown area range: {}".format(area)
+    area_range = area_ranges[areas[area]]
+    gt_overlaps = []
+    num_pos = 0
+
+    for prediction_dict in dataset_predictions:
+        predictions = prediction_dict["proposals"]
+
+        # sort predictions in descending order
+        # TODO maybe remove this and make it explicit in the documentation
+        inds = predictions.objectness_logits.sort(descending=True)[1]
+        predictions = predictions[inds]
+
+        ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"])
+        anno = coco_api.loadAnns(ann_ids)
+        gt_boxes = [
+            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
+            for obj in anno
+            if obj["iscrowd"] == 0
+        ]
+        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
+        gt_boxes = Boxes(gt_boxes)
+        gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0])
+
+        if len(gt_boxes) == 0 or len(predictions) == 0:
+            continue
+
+        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
+        gt_boxes = gt_boxes[valid_gt_inds]
+
+        num_pos += len(gt_boxes)
+
+        if len(gt_boxes) == 0:
+            continue
+
+        if limit is not None and len(predictions) > limit:
+            predictions = predictions[:limit]
+
+        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
+
+        _gt_overlaps = torch.zeros(len(gt_boxes))
+        for j in range(min(len(predictions), len(gt_boxes))):
+            # find which proposal box maximally covers each gt box
+            # and get the iou amount of coverage for each gt box
+            max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+
+            # find which gt box is 'best' covered (i.e. 'best' = most iou)
+            gt_ovr, gt_ind = max_overlaps.max(dim=0)
+            assert gt_ovr >= 0
+            # find the proposal box that covers the best covered gt box
+            box_ind = argmax_overlaps[gt_ind]
+            # record the iou coverage of this gt box
+            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+            assert _gt_overlaps[j] == gt_ovr
+            # mark the proposal box and the gt box as used
+            overlaps[box_ind, :] = -1
+            overlaps[:, gt_ind] = -1
+
+        # append recorded iou coverage level
+        gt_overlaps.append(_gt_overlaps)
+    gt_overlaps = (
+        torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
+    )
+    gt_overlaps, _ = torch.sort(gt_overlaps)
+
+    if thresholds is None:
+        step = 0.05
+        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
+    recalls = torch.zeros_like(thresholds)
+    # compute recall for each iou threshold
+    for i, t in enumerate(thresholds):
+        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
+    # ar = 2 * np.trapz(recalls, thresholds)
+    ar = recalls.mean()
+    return {
+        "ar": ar,
+        "recalls": recalls,
+        "thresholds": thresholds,
+        "gt_overlaps": gt_overlaps,
+        "num_pos": num_pos,
+    }
+
+
+def _evaluate_predictions_on_coco(
+    coco_gt, coco_results, iou_type, kpt_oks_sigmas=None, use_fast_impl=True, img_ids=None
+):
+    """
+    Evaluate the coco results using COCOEval API.
+    """
+    assert len(coco_results) > 0
+
+    if iou_type == "segm":
+        coco_results = copy.deepcopy(coco_results)
+        # When evaluating mask AP, if the results contain bbox, cocoapi will
+        # use the box area as the area of the instance, instead of the mask area.
+        # This leads to a different definition of small/medium/large.
+        # We remove the bbox field to let mask AP use mask area.
+        for c in coco_results:
+            c.pop("bbox", None)
+
+    coco_dt = coco_gt.loadRes(coco_results)
+    coco_eval = (COCOeval_opt if use_fast_impl else COCOeval)(coco_gt, coco_dt, iou_type)
+    if img_ids is not None:
+        coco_eval.params.imgIds = img_ids
+
+    if iou_type == "keypoints":
+        # Use the COCO default keypoint OKS sigmas unless overrides are specified
+        if kpt_oks_sigmas:
+            assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "pycocotools is too old!"
+            coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas)
+        # COCOAPI requires every detection and every gt to have keypoints, so
+        # we just take the first entry from both
+        num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3
+        num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3
+        num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas)
+        assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, (
+            f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. "
+            f"Ground truth contains {num_keypoints_gt} keypoints. "
+            f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. "
+            "They have to agree with each other. For meaning of OKS, please refer to "
+            "http://cocodataset.org/#keypoints-eval."
+        )
+
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+
+    return coco_eval
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/evaluator.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..07da68163e214d7fefac95868d95a91c953f8f37
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/evaluator.py
@@ -0,0 +1,200 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import datetime
+import logging
+import time
+from collections import OrderedDict
+from contextlib import ExitStack, contextmanager
+import torch
+from torch import nn
+
+from detectron2.utils.comm import get_world_size, is_main_process
+from detectron2.utils.logger import log_every_n_seconds
+
+
+class DatasetEvaluator:
+    """
+    Base class for a dataset evaluator.
+
+    The function :func:`inference_on_dataset` runs the model over
+    all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
+
+    This class will accumulate information of the inputs/outputs (by :meth:`process`),
+    and produce evaluation results in the end (by :meth:`evaluate`).
+    """
+
+    def reset(self):
+        """
+        Preparation for a new round of evaluation.
+        Should be called before starting a round of evaluation.
+        """
+        pass
+
+    def process(self, inputs, outputs):
+        """
+        Process the pair of inputs and outputs.
+        If they contain batches, the pairs can be consumed one-by-one using `zip`:
+
+        .. code-block:: python
+
+            for input_, output in zip(inputs, outputs):
+                # do evaluation on single input/output pair
+                ...
+
+        Args:
+            inputs (list): the inputs that's used to call the model.
+            outputs (list): the return value of `model(inputs)`
+        """
+        pass
+
+    def evaluate(self):
+        """
+        Evaluate/summarize the performance, after processing all input/output pairs.
+
+        Returns:
+            dict:
+                A new evaluator class can return a dict of arbitrary format
+                as long as the user can process the results.
+                In our train_net.py, we expect the following format:
+
+                * key: the name of the task (e.g., bbox)
+                * value: a dict of {metric name: score}, e.g.: {"AP50": 80}
+        """
+        pass
+
+
+class DatasetEvaluators(DatasetEvaluator):
+    """
+    Wrapper class to combine multiple :class:`DatasetEvaluator` instances.
+
+    This class dispatches every evaluation call to
+    all of its :class:`DatasetEvaluator`.
+    """
+
+    def __init__(self, evaluators):
+        """
+        Args:
+            evaluators (list): the evaluators to combine.
+        """
+        super().__init__()
+        self._evaluators = evaluators
+
+    def reset(self):
+        for evaluator in self._evaluators:
+            evaluator.reset()
+
+    def process(self, inputs, outputs):
+        for evaluator in self._evaluators:
+            evaluator.process(inputs, outputs)
+
+    def evaluate(self):
+        results = OrderedDict()
+        for evaluator in self._evaluators:
+            result = evaluator.evaluate()
+            if is_main_process() and result is not None:
+                for k, v in result.items():
+                    assert (
+                        k not in results
+                    ), "Different evaluators produce results with the same key {}".format(k)
+                    results[k] = v
+        return results
+
+
+def inference_on_dataset(model, data_loader, evaluator):
+    """
+    Run model on the data_loader and evaluate the metrics with evaluator.
+    Also benchmark the inference speed of `model.__call__` accurately.
+    The model will be used in eval mode.
+
+    Args:
+        model (callable): a callable which takes an object from
+            `data_loader` and returns some outputs.
+
+            If it's an nn.Module, it will be temporarily set to `eval` mode.
+            If you wish to evaluate a model in `training` mode instead, you can
+            wrap the given model and override its behavior of `.eval()` and `.train()`.
+        data_loader: an iterable object with a length.
+            The elements it generates will be the inputs to the model.
+        evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want
+            to benchmark, but don't want to do any evaluation.
+
+    Returns:
+        The return value of `evaluator.evaluate()`
+    """
+    num_devices = get_world_size()
+    logger = logging.getLogger(__name__)
+    logger.info("Start inference on {} images".format(len(data_loader)))
+
+    total = len(data_loader)  # inference data loader must have a fixed length
+    if evaluator is None:
+        # create a no-op evaluator
+        evaluator = DatasetEvaluators([])
+    evaluator.reset()
+
+    num_warmup = min(5, total - 1)
+    start_time = time.perf_counter()
+    total_compute_time = 0
+    with ExitStack() as stack:
+        if isinstance(model, nn.Module):
+            stack.enter_context(inference_context(model))
+        stack.enter_context(torch.no_grad())
+
+        for idx, inputs in enumerate(data_loader):
+            if idx == num_warmup:
+                start_time = time.perf_counter()
+                total_compute_time = 0
+
+            start_compute_time = time.perf_counter()
+            outputs = model(inputs)
+            if torch.cuda.is_available():
+                torch.cuda.synchronize()
+            total_compute_time += time.perf_counter() - start_compute_time
+            evaluator.process(inputs, outputs)
+            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
+            seconds_per_img = total_compute_time / iters_after_start
+            if idx >= num_warmup * 2 or seconds_per_img > 5:
+                total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
+                eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
+                log_every_n_seconds(
+                    logging.INFO,
+                    "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
+                        idx + 1, total, seconds_per_img, str(eta)
+                    ),
+                    n=5,
+                )
+
+    # Measure the time only for this worker (before the synchronization barrier)
+    total_time = time.perf_counter() - start_time
+    total_time_str = str(datetime.timedelta(seconds=total_time))
+    # NOTE this format is parsed by grep
+    logger.info(
+        "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format(
+            total_time_str, total_time / (total - num_warmup), num_devices
+        )
+    )
+    total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
+    logger.info(
+        "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format(
+            total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
+        )
+    )
+    results = evaluator.evaluate()
+    # An evaluator may return None when not in main process.
+    # Replace it by an empty dict instead to make it easier for downstream code to handle
+    if results is None:
+        results = {}
+    return results
+
+
+@contextmanager
+def inference_context(model):
+    """
+    A context where the model is temporarily changed to eval mode,
+    and restored to previous mode afterwards.
+
+    Args:
+        model: a torch Module
+    """
+    training_mode = model.training
+    model.eval()
+    yield
+    model.train(training_mode)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/fast_eval_api.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/fast_eval_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..2eb202bd5efa3ec3d366027b1debffc269ae8b17
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/fast_eval_api.py
@@ -0,0 +1,121 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import numpy as np
+import time
+from pycocotools.cocoeval import COCOeval
+
+from detectron2 import _C
+
+logger = logging.getLogger(__name__)
+
+
+class COCOeval_opt(COCOeval):
+    """
+    This is a slightly modified version of the original COCO API, where the functions evaluateImg()
+    and accumulate() are implemented in C++ to speedup evaluation
+    """
+
+    def evaluate(self):
+        """
+        Run per image evaluation on given images and store results in self.evalImgs_cpp, a
+        datastructure that isn't readable from Python but is used by a c++ implementation of
+        accumulate().  Unlike the original COCO PythonAPI, we don't populate the datastructure
+        self.evalImgs because this datastructure is a computational bottleneck.
+        :return: None
+        """
+        tic = time.time()
+
+        p = self.params
+        # add backward compatibility if useSegm is specified in params
+        if p.useSegm is not None:
+            p.iouType = "segm" if p.useSegm == 1 else "bbox"
+        logger.info("Evaluate annotation type *{}*".format(p.iouType))
+        p.imgIds = list(np.unique(p.imgIds))
+        if p.useCats:
+            p.catIds = list(np.unique(p.catIds))
+        p.maxDets = sorted(p.maxDets)
+        self.params = p
+
+        self._prepare()  # bottleneck
+
+        # loop through images, area range, max detection number
+        catIds = p.catIds if p.useCats else [-1]
+
+        if p.iouType == "segm" or p.iouType == "bbox":
+            computeIoU = self.computeIoU
+        elif p.iouType == "keypoints":
+            computeIoU = self.computeOks
+        self.ious = {
+            (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
+        }  # bottleneck
+
+        maxDet = p.maxDets[-1]
+
+        # <<<< Beginning of code differences with original COCO API
+        def convert_instances_to_cpp(instances, is_det=False):
+            # Convert annotations for a list of instances in an image to a format that's fast
+            # to access in C++
+            instances_cpp = []
+            for instance in instances:
+                instance_cpp = _C.InstanceAnnotation(
+                    int(instance["id"]),
+                    instance["score"] if is_det else instance.get("score", 0.0),
+                    instance["area"],
+                    bool(instance.get("iscrowd", 0)),
+                    bool(instance.get("ignore", 0)),
+                )
+                instances_cpp.append(instance_cpp)
+            return instances_cpp
+
+        # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
+        ground_truth_instances = [
+            [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
+            for imgId in p.imgIds
+        ]
+        detected_instances = [
+            [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds]
+            for imgId in p.imgIds
+        ]
+        ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
+
+        if not p.useCats:
+            # For each image, flatten per-category lists into a single list
+            ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances]
+            detected_instances = [[[o for c in i for o in c]] for i in detected_instances]
+
+        # Call C++ implementation of self.evaluateImgs()
+        self._evalImgs_cpp = _C.COCOevalEvaluateImages(
+            p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances
+        )
+        self._evalImgs = None
+
+        self._paramsEval = copy.deepcopy(self.params)
+        toc = time.time()
+        logger.info("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic))
+        # >>>> End of code differences with original COCO API
+
+    def accumulate(self):
+        """
+        Accumulate per image evaluation results and store the result in self.eval.  Does not
+        support changing parameter settings from those used by self.evaluate()
+        """
+        logger.info("Accumulating evaluation results...")
+        tic = time.time()
+        assert hasattr(
+            self, "_evalImgs_cpp"
+        ), "evaluate() must be called before accmulate() is called."
+
+        self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
+
+        # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
+        self.eval["recall"] = np.array(self.eval["recall"]).reshape(
+            self.eval["counts"][:1] + self.eval["counts"][2:]
+        )
+
+        # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
+        # num_area_ranges X num_max_detections
+        self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"])
+        self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
+        toc = time.time()
+        logger.info("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic))
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/lvis_evaluation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/lvis_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..81f08e40cf61f0c451e63565debac7f6877b99d9
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/lvis_evaluation.py
@@ -0,0 +1,358 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import json
+import logging
+import os
+import pickle
+from collections import OrderedDict
+import torch
+
+import detectron2.utils.comm as comm
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+
+from .coco_evaluation import instances_to_coco_json
+from .evaluator import DatasetEvaluator
+
+
+class LVISEvaluator(DatasetEvaluator):
+    """
+    Evaluate object proposal and instance detection/segmentation outputs using
+    LVIS's metrics and evaluation API.
+    """
+
+    def __init__(self, dataset_name, tasks=None, distributed=True, output_dir=None):
+        """
+        Args:
+            dataset_name (str): name of the dataset to be evaluated.
+                It must have the following corresponding metadata:
+                "json_file": the path to the LVIS format annotation
+            tasks (tuple[str]): tasks that can be evaluated under the given
+                configuration. A task is one of "bbox", "segm".
+                By default, will infer this automatically from predictions.
+            distributed (True): if True, will collect results from all ranks for evaluation.
+                Otherwise, will evaluate the results in the current process.
+            output_dir (str): optional, an output directory to dump results.
+        """
+        from lvis import LVIS
+
+        self._logger = logging.getLogger(__name__)
+
+        if tasks is not None and isinstance(tasks, CfgNode):
+            self._logger.warn(
+                "COCO Evaluator instantiated using config, this is deprecated behavior."
+                " Please pass in explicit arguments instead."
+            )
+            self._tasks = None  # Infering it from predictions should be better
+        else:
+            self._tasks = tasks
+
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+
+        self._metadata = MetadataCatalog.get(dataset_name)
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        self._lvis_api = LVIS(json_file)
+        # Test set json files do not contain annotations (evaluation must be
+        # performed using the LVIS evaluation server).
+        self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0
+
+    def reset(self):
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a LVIS model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+        """
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+
+            if "instances" in output:
+                instances = output["instances"].to(self._cpu_device)
+                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
+            if "proposals" in output:
+                prediction["proposals"] = output["proposals"].to(self._cpu_device)
+            self._predictions.append(prediction)
+
+    def evaluate(self):
+        if self._distributed:
+            comm.synchronize()
+            predictions = comm.gather(self._predictions, dst=0)
+            predictions = list(itertools.chain(*predictions))
+
+            if not comm.is_main_process():
+                return
+        else:
+            predictions = self._predictions
+
+        if len(predictions) == 0:
+            self._logger.warning("[LVISEvaluator] Did not receive valid predictions.")
+            return {}
+
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "instances_predictions.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(predictions, f)
+
+        self._results = OrderedDict()
+        if "proposals" in predictions[0]:
+            self._eval_box_proposals(predictions)
+        if "instances" in predictions[0]:
+            self._eval_predictions(predictions)
+        # Copy so the caller can do whatever with results
+        return copy.deepcopy(self._results)
+
+    def _tasks_from_predictions(self, predictions):
+        for pred in predictions:
+            if "segmentation" in pred:
+                return ("bbox", "segm")
+        return ("bbox",)
+
+    def _eval_predictions(self, predictions):
+        """
+        Evaluate predictions. Fill self._results with the metrics of the tasks.
+
+        Args:
+            predictions (list[dict]): list of outputs from the model
+        """
+        self._logger.info("Preparing results in the LVIS format ...")
+        lvis_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        tasks = self._tasks or self._tasks_from_predictions(lvis_results)
+
+        # LVIS evaluator can be used to evaluate results for COCO dataset categories.
+        # In this case `_metadata` variable will have a field with COCO-specific category mapping.
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            reverse_id_mapping = {
+                v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+            }
+            for result in lvis_results:
+                result["category_id"] = reverse_id_mapping[result["category_id"]]
+        else:
+            # unmap the category ids for LVIS (from 0-indexed to 1-indexed)
+            for result in lvis_results:
+                result["category_id"] += 1
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "lvis_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(lvis_results))
+                f.flush()
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info("Evaluating predictions ...")
+        for task in sorted(tasks):
+            res = _evaluate_predictions_on_lvis(
+                self._lvis_api, lvis_results, task, class_names=self._metadata.get("thing_classes")
+            )
+            self._results[task] = res
+
+    def _eval_box_proposals(self, predictions):
+        """
+        Evaluate the box proposals in predictions.
+        Fill self._results with the metrics for "box_proposals" task.
+        """
+        if self._output_dir:
+            # Saving generated box proposals to file.
+            # Predicted box_proposals are in XYXY_ABS mode.
+            bbox_mode = BoxMode.XYXY_ABS.value
+            ids, boxes, objectness_logits = [], [], []
+            for prediction in predictions:
+                ids.append(prediction["image_id"])
+                boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
+                objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
+
+            proposal_data = {
+                "boxes": boxes,
+                "objectness_logits": objectness_logits,
+                "ids": ids,
+                "bbox_mode": bbox_mode,
+            }
+            with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
+                pickle.dump(proposal_data, f)
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info("Evaluating bbox proposals ...")
+        res = {}
+        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
+        for limit in [100, 1000]:
+            for area, suffix in areas.items():
+                stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit)
+                key = "AR{}@{:d}".format(suffix, limit)
+                res[key] = float(stats["ar"].item() * 100)
+        self._logger.info("Proposal metrics: \n" + create_small_table(res))
+        self._results["box_proposals"] = res
+
+
+# inspired from Detectron:
+# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
+def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None):
+    """
+    Evaluate detection proposal recall metrics. This function is a much
+    faster alternative to the official LVIS API recall evaluation code. However,
+    it produces slightly different results.
+    """
+    # Record max overlap value for each gt box
+    # Return vector of overlap values
+    areas = {
+        "all": 0,
+        "small": 1,
+        "medium": 2,
+        "large": 3,
+        "96-128": 4,
+        "128-256": 5,
+        "256-512": 6,
+        "512-inf": 7,
+    }
+    area_ranges = [
+        [0 ** 2, 1e5 ** 2],  # all
+        [0 ** 2, 32 ** 2],  # small
+        [32 ** 2, 96 ** 2],  # medium
+        [96 ** 2, 1e5 ** 2],  # large
+        [96 ** 2, 128 ** 2],  # 96-128
+        [128 ** 2, 256 ** 2],  # 128-256
+        [256 ** 2, 512 ** 2],  # 256-512
+        [512 ** 2, 1e5 ** 2],
+    ]  # 512-inf
+    assert area in areas, "Unknown area range: {}".format(area)
+    area_range = area_ranges[areas[area]]
+    gt_overlaps = []
+    num_pos = 0
+
+    for prediction_dict in dataset_predictions:
+        predictions = prediction_dict["proposals"]
+
+        # sort predictions in descending order
+        # TODO maybe remove this and make it explicit in the documentation
+        inds = predictions.objectness_logits.sort(descending=True)[1]
+        predictions = predictions[inds]
+
+        ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]])
+        anno = lvis_api.load_anns(ann_ids)
+        gt_boxes = [
+            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno
+        ]
+        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
+        gt_boxes = Boxes(gt_boxes)
+        gt_areas = torch.as_tensor([obj["area"] for obj in anno])
+
+        if len(gt_boxes) == 0 or len(predictions) == 0:
+            continue
+
+        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
+        gt_boxes = gt_boxes[valid_gt_inds]
+
+        num_pos += len(gt_boxes)
+
+        if len(gt_boxes) == 0:
+            continue
+
+        if limit is not None and len(predictions) > limit:
+            predictions = predictions[:limit]
+
+        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
+
+        _gt_overlaps = torch.zeros(len(gt_boxes))
+        for j in range(min(len(predictions), len(gt_boxes))):
+            # find which proposal box maximally covers each gt box
+            # and get the iou amount of coverage for each gt box
+            max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+
+            # find which gt box is 'best' covered (i.e. 'best' = most iou)
+            gt_ovr, gt_ind = max_overlaps.max(dim=0)
+            assert gt_ovr >= 0
+            # find the proposal box that covers the best covered gt box
+            box_ind = argmax_overlaps[gt_ind]
+            # record the iou coverage of this gt box
+            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+            assert _gt_overlaps[j] == gt_ovr
+            # mark the proposal box and the gt box as used
+            overlaps[box_ind, :] = -1
+            overlaps[:, gt_ind] = -1
+
+        # append recorded iou coverage level
+        gt_overlaps.append(_gt_overlaps)
+    gt_overlaps = (
+        torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
+    )
+    gt_overlaps, _ = torch.sort(gt_overlaps)
+
+    if thresholds is None:
+        step = 0.05
+        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
+    recalls = torch.zeros_like(thresholds)
+    # compute recall for each iou threshold
+    for i, t in enumerate(thresholds):
+        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
+    # ar = 2 * np.trapz(recalls, thresholds)
+    ar = recalls.mean()
+    return {
+        "ar": ar,
+        "recalls": recalls,
+        "thresholds": thresholds,
+        "gt_overlaps": gt_overlaps,
+        "num_pos": num_pos,
+    }
+
+
+def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, class_names=None):
+    """
+    Args:
+        iou_type (str):
+        kpt_oks_sigmas (list[float]):
+        class_names (None or list[str]): if provided, will use it to predict
+            per-category AP.
+
+    Returns:
+        a dict of {metric name: score}
+    """
+    metrics = {
+        "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
+        "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
+    }[iou_type]
+
+    logger = logging.getLogger(__name__)
+
+    if len(lvis_results) == 0:  # TODO: check if needed
+        logger.warn("No predictions from the model!")
+        return {metric: float("nan") for metric in metrics}
+
+    if iou_type == "segm":
+        lvis_results = copy.deepcopy(lvis_results)
+        # When evaluating mask AP, if the results contain bbox, LVIS API will
+        # use the box area as the area of the instance, instead of the mask area.
+        # This leads to a different definition of small/medium/large.
+        # We remove the bbox field to let mask AP use mask area.
+        for c in lvis_results:
+            c.pop("bbox", None)
+
+    from lvis import LVISEval, LVISResults
+
+    lvis_results = LVISResults(lvis_gt, lvis_results)
+    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
+    lvis_eval.run()
+    lvis_eval.print_results()
+
+    # Pull the standard metrics from the LVIS results
+    results = lvis_eval.get_results()
+    results = {metric: float(results[metric] * 100) for metric in metrics}
+    logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results))
+    return results
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/panoptic_evaluation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/panoptic_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..9fb3462b7f9abf6feaa499976bfed526ebd17e31
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/panoptic_evaluation.py
@@ -0,0 +1,199 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import tempfile
+from collections import OrderedDict
+from typing import Optional
+from PIL import Image
+from tabulate import tabulate
+
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+logger = logging.getLogger(__name__)
+
+
+class COCOPanopticEvaluator(DatasetEvaluator):
+    """
+    Evaluate Panoptic Quality metrics on COCO using PanopticAPI.
+    It saves panoptic segmentation prediction in `output_dir`
+
+    It contains a synchronize call and has to be called from all workers.
+    """
+
+    def __init__(self, dataset_name: str, output_dir: Optional[str] = None):
+        """
+        Args:
+            dataset_name: name of the dataset
+            output_dir: output directory to save results for evaluation.
+        """
+        self._metadata = MetadataCatalog.get(dataset_name)
+        self._thing_contiguous_id_to_dataset_id = {
+            v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+        }
+        self._stuff_contiguous_id_to_dataset_id = {
+            v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items()
+        }
+
+        self._output_dir = output_dir
+        if self._output_dir is not None:
+            PathManager.mkdirs(self._output_dir)
+
+    def reset(self):
+        self._predictions = []
+
+    def _convert_category_id(self, segment_info):
+        isthing = segment_info.pop("isthing", None)
+        if isthing is None:
+            # the model produces panoptic category id directly. No more conversion needed
+            return segment_info
+        if isthing is True:
+            segment_info["category_id"] = self._thing_contiguous_id_to_dataset_id[
+                segment_info["category_id"]
+            ]
+        else:
+            segment_info["category_id"] = self._stuff_contiguous_id_to_dataset_id[
+                segment_info["category_id"]
+            ]
+        return segment_info
+
+    def process(self, inputs, outputs):
+        from panopticapi.utils import id2rgb
+
+        for input, output in zip(inputs, outputs):
+            panoptic_img, segments_info = output["panoptic_seg"]
+            panoptic_img = panoptic_img.cpu().numpy()
+            if segments_info is None:
+                # If "segments_info" is None, we assume "panoptic_img" is a
+                # H*W int32 image storing the panoptic_id in the format of
+                # category_id * label_divisor + instance_id. We reserve -1 for
+                # VOID label, and add 1 to panoptic_img since the official
+                # evaluation script uses 0 for VOID label.
+                label_divisor = self._metadata.label_divisor
+                segments_info = []
+                for panoptic_label in np.unique(panoptic_img):
+                    if panoptic_label == -1:
+                        # VOID region.
+                        continue
+                    pred_class = panoptic_label // label_divisor
+                    isthing = (
+                        pred_class in self._metadata.thing_dataset_id_to_contiguous_id.values()
+                    )
+                    segments_info.append(
+                        {
+                            "id": int(panoptic_label) + 1,
+                            "category_id": int(pred_class),
+                            "isthing": bool(isthing),
+                        }
+                    )
+                # Official evaluation script uses 0 for VOID label.
+                panoptic_img += 1
+
+            file_name = os.path.basename(input["file_name"])
+            file_name_png = os.path.splitext(file_name)[0] + ".png"
+            with io.BytesIO() as out:
+                Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG")
+                segments_info = [self._convert_category_id(x) for x in segments_info]
+                self._predictions.append(
+                    {
+                        "image_id": input["image_id"],
+                        "file_name": file_name_png,
+                        "png_string": out.getvalue(),
+                        "segments_info": segments_info,
+                    }
+                )
+
+    def evaluate(self):
+        comm.synchronize()
+
+        self._predictions = comm.gather(self._predictions)
+        self._predictions = list(itertools.chain(*self._predictions))
+        if not comm.is_main_process():
+            return
+
+        # PanopticApi requires local files
+        gt_json = PathManager.get_local_path(self._metadata.panoptic_json)
+        gt_folder = PathManager.get_local_path(self._metadata.panoptic_root)
+
+        with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir:
+            logger.info("Writing all panoptic predictions to {} ...".format(pred_dir))
+            for p in self._predictions:
+                with open(os.path.join(pred_dir, p["file_name"]), "wb") as f:
+                    f.write(p.pop("png_string"))
+
+            with open(gt_json, "r") as f:
+                json_data = json.load(f)
+            json_data["annotations"] = self._predictions
+
+            output_dir = self._output_dir or pred_dir
+            predictions_json = os.path.join(output_dir, "predictions.json")
+            with PathManager.open(predictions_json, "w") as f:
+                f.write(json.dumps(json_data))
+
+            from panopticapi.evaluation import pq_compute
+
+            with contextlib.redirect_stdout(io.StringIO()):
+                pq_res = pq_compute(
+                    gt_json,
+                    PathManager.get_local_path(predictions_json),
+                    gt_folder=gt_folder,
+                    pred_folder=pred_dir,
+                )
+
+        res = {}
+        res["PQ"] = 100 * pq_res["All"]["pq"]
+        res["SQ"] = 100 * pq_res["All"]["sq"]
+        res["RQ"] = 100 * pq_res["All"]["rq"]
+        res["PQ_th"] = 100 * pq_res["Things"]["pq"]
+        res["SQ_th"] = 100 * pq_res["Things"]["sq"]
+        res["RQ_th"] = 100 * pq_res["Things"]["rq"]
+        res["PQ_st"] = 100 * pq_res["Stuff"]["pq"]
+        res["SQ_st"] = 100 * pq_res["Stuff"]["sq"]
+        res["RQ_st"] = 100 * pq_res["Stuff"]["rq"]
+
+        results = OrderedDict({"panoptic_seg": res})
+        _print_panoptic_results(pq_res)
+
+        return results
+
+
+def _print_panoptic_results(pq_res):
+    headers = ["", "PQ", "SQ", "RQ", "#categories"]
+    data = []
+    for name in ["All", "Things", "Stuff"]:
+        row = [name] + [pq_res[name][k] * 100 for k in ["pq", "sq", "rq"]] + [pq_res[name]["n"]]
+        data.append(row)
+    table = tabulate(
+        data, headers=headers, tablefmt="pipe", floatfmt=".3f", stralign="center", numalign="center"
+    )
+    logger.info("Panoptic Evaluation Results:\n" + table)
+
+
+if __name__ == "__main__":
+    from detectron2.utils.logger import setup_logger
+
+    logger = setup_logger()
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--gt-json")
+    parser.add_argument("--gt-dir")
+    parser.add_argument("--pred-json")
+    parser.add_argument("--pred-dir")
+    args = parser.parse_args()
+
+    from panopticapi.evaluation import pq_compute
+
+    with contextlib.redirect_stdout(io.StringIO()):
+        pq_res = pq_compute(
+            args.gt_json, args.pred_json, gt_folder=args.gt_dir, pred_folder=args.pred_dir
+        )
+        _print_panoptic_results(pq_res)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/pascal_voc_evaluation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/pascal_voc_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d1abcde2f87bb5f103e73cb364aaabbecb6e619
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/pascal_voc_evaluation.py
@@ -0,0 +1,300 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+import numpy as np
+import os
+import tempfile
+import xml.etree.ElementTree as ET
+from collections import OrderedDict, defaultdict
+from functools import lru_cache
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+
+class PascalVOCDetectionEvaluator(DatasetEvaluator):
+    """
+    Evaluate Pascal VOC style AP for Pascal VOC dataset.
+    It contains a synchronization, therefore has to be called from all ranks.
+
+    Note that the concept of AP can be implemented in different ways and may not
+    produce identical results. This class mimics the implementation of the official
+    Pascal VOC Matlab API, and should produce similar but not identical results to the
+    official API.
+    """
+
+    def __init__(self, dataset_name):
+        """
+        Args:
+            dataset_name (str): name of the dataset, e.g., "voc_2007_test"
+        """
+        self._dataset_name = dataset_name
+        meta = MetadataCatalog.get(dataset_name)
+
+        # Too many tiny files, download all to local for speed.
+        annotation_dir_local = PathManager.get_local_path(
+            os.path.join(meta.dirname, "Annotations/")
+        )
+        self._anno_file_template = os.path.join(annotation_dir_local, "{}.xml")
+        self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt")
+        self._class_names = meta.thing_classes
+        assert meta.year in [2007, 2012], meta.year
+        self._is_2007 = meta.year == 2007
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+
+    def reset(self):
+        self._predictions = defaultdict(list)  # class name -> list of prediction strings
+
+    def process(self, inputs, outputs):
+        for input, output in zip(inputs, outputs):
+            image_id = input["image_id"]
+            instances = output["instances"].to(self._cpu_device)
+            boxes = instances.pred_boxes.tensor.numpy()
+            scores = instances.scores.tolist()
+            classes = instances.pred_classes.tolist()
+            for box, score, cls in zip(boxes, scores, classes):
+                xmin, ymin, xmax, ymax = box
+                # The inverse of data loading logic in `datasets/pascal_voc.py`
+                xmin += 1
+                ymin += 1
+                self._predictions[cls].append(
+                    f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}"
+                )
+
+    def evaluate(self):
+        """
+        Returns:
+            dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75".
+        """
+        all_predictions = comm.gather(self._predictions, dst=0)
+        if not comm.is_main_process():
+            return
+        predictions = defaultdict(list)
+        for predictions_per_rank in all_predictions:
+            for clsid, lines in predictions_per_rank.items():
+                predictions[clsid].extend(lines)
+        del all_predictions
+
+        self._logger.info(
+            "Evaluating {} using {} metric. "
+            "Note that results do not use the official Matlab API.".format(
+                self._dataset_name, 2007 if self._is_2007 else 2012
+            )
+        )
+
+        with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname:
+            res_file_template = os.path.join(dirname, "{}.txt")
+
+            aps = defaultdict(list)  # iou -> ap per class
+            for cls_id, cls_name in enumerate(self._class_names):
+                lines = predictions.get(cls_id, [""])
+
+                with open(res_file_template.format(cls_name), "w") as f:
+                    f.write("\n".join(lines))
+
+                for thresh in range(50, 100, 5):
+                    rec, prec, ap = voc_eval(
+                        res_file_template,
+                        self._anno_file_template,
+                        self._image_set_path,
+                        cls_name,
+                        ovthresh=thresh / 100.0,
+                        use_07_metric=self._is_2007,
+                    )
+                    aps[thresh].append(ap * 100)
+
+        ret = OrderedDict()
+        mAP = {iou: np.mean(x) for iou, x in aps.items()}
+        ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]}
+        return ret
+
+
+##############################################################################
+#
+# Below code is modified from
+# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py
+# --------------------------------------------------------
+# Fast/er R-CNN
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Bharath Hariharan
+# --------------------------------------------------------
+
+"""Python implementation of the PASCAL VOC devkit's AP evaluation code."""
+
+
+@lru_cache(maxsize=None)
+def parse_rec(filename):
+    """Parse a PASCAL VOC xml file."""
+    with PathManager.open(filename) as f:
+        tree = ET.parse(f)
+    objects = []
+    for obj in tree.findall("object"):
+        obj_struct = {}
+        obj_struct["name"] = obj.find("name").text
+        obj_struct["pose"] = obj.find("pose").text
+        obj_struct["truncated"] = int(obj.find("truncated").text)
+        obj_struct["difficult"] = int(obj.find("difficult").text)
+        bbox = obj.find("bndbox")
+        obj_struct["bbox"] = [
+            int(bbox.find("xmin").text),
+            int(bbox.find("ymin").text),
+            int(bbox.find("xmax").text),
+            int(bbox.find("ymax").text),
+        ]
+        objects.append(obj_struct)
+
+    return objects
+
+
+def voc_ap(rec, prec, use_07_metric=False):
+    """Compute VOC AP given precision and recall. If use_07_metric is true, uses
+    the VOC 07 11-point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.0
+        for t in np.arange(0.0, 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.0
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.0], rec, [1.0]))
+        mpre = np.concatenate(([0.0], prec, [0.0]))
+
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+
+def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False):
+    """rec, prec, ap = voc_eval(detpath,
+                                annopath,
+                                imagesetfile,
+                                classname,
+                                [ovthresh],
+                                [use_07_metric])
+
+    Top level function that does the PASCAL VOC evaluation.
+
+    detpath: Path to detections
+        detpath.format(classname) should produce the detection results file.
+    annopath: Path to annotations
+        annopath.format(imagename) should be the xml annotations file.
+    imagesetfile: Text file containing the list of images, one image per line.
+    classname: Category name (duh)
+    [ovthresh]: Overlap threshold (default = 0.5)
+    [use_07_metric]: Whether to use VOC07's 11 point AP computation
+        (default False)
+    """
+    # assumes detections are in detpath.format(classname)
+    # assumes annotations are in annopath.format(imagename)
+    # assumes imagesetfile is a text file with each line an image name
+
+    # first load gt
+    # read list of images
+    with PathManager.open(imagesetfile, "r") as f:
+        lines = f.readlines()
+    imagenames = [x.strip() for x in lines]
+
+    # load annots
+    recs = {}
+    for imagename in imagenames:
+        recs[imagename] = parse_rec(annopath.format(imagename))
+
+    # extract gt objects for this class
+    class_recs = {}
+    npos = 0
+    for imagename in imagenames:
+        R = [obj for obj in recs[imagename] if obj["name"] == classname]
+        bbox = np.array([x["bbox"] for x in R])
+        difficult = np.array([x["difficult"] for x in R]).astype(np.bool)
+        # difficult = np.array([False for x in R]).astype(np.bool)  # treat all "difficult" as GT
+        det = [False] * len(R)
+        npos = npos + sum(~difficult)
+        class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det}
+
+    # read dets
+    detfile = detpath.format(classname)
+    with open(detfile, "r") as f:
+        lines = f.readlines()
+
+    splitlines = [x.strip().split(" ") for x in lines]
+    image_ids = [x[0] for x in splitlines]
+    confidence = np.array([float(x[1]) for x in splitlines])
+    BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4)
+
+    # sort by confidence
+    sorted_ind = np.argsort(-confidence)
+    BB = BB[sorted_ind, :]
+    image_ids = [image_ids[x] for x in sorted_ind]
+
+    # go down dets and mark TPs and FPs
+    nd = len(image_ids)
+    tp = np.zeros(nd)
+    fp = np.zeros(nd)
+    for d in range(nd):
+        R = class_recs[image_ids[d]]
+        bb = BB[d, :].astype(float)
+        ovmax = -np.inf
+        BBGT = R["bbox"].astype(float)
+
+        if BBGT.size > 0:
+            # compute overlaps
+            # intersection
+            ixmin = np.maximum(BBGT[:, 0], bb[0])
+            iymin = np.maximum(BBGT[:, 1], bb[1])
+            ixmax = np.minimum(BBGT[:, 2], bb[2])
+            iymax = np.minimum(BBGT[:, 3], bb[3])
+            iw = np.maximum(ixmax - ixmin + 1.0, 0.0)
+            ih = np.maximum(iymax - iymin + 1.0, 0.0)
+            inters = iw * ih
+
+            # union
+            uni = (
+                (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)
+                + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0)
+                - inters
+            )
+
+            overlaps = inters / uni
+            ovmax = np.max(overlaps)
+            jmax = np.argmax(overlaps)
+
+        if ovmax > ovthresh:
+            if not R["difficult"][jmax]:
+                if not R["det"][jmax]:
+                    tp[d] = 1.0
+                    R["det"][jmax] = 1
+                else:
+                    fp[d] = 1.0
+        else:
+            fp[d] = 1.0
+
+    # compute precision recall
+    fp = np.cumsum(fp)
+    tp = np.cumsum(tp)
+    rec = tp / float(npos)
+    # avoid divide by zero in case the first detection matches a difficult
+    # ground truth
+    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+    ap = voc_ap(rec, prec, use_07_metric)
+
+    return rec, prec, ap
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/rotated_coco_evaluation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/rotated_coco_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea6d1b381dcf106339a03f08577df673ad439c46
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/rotated_coco_evaluation.py
@@ -0,0 +1,207 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import json
+import numpy as np
+import os
+import torch
+from pycocotools.cocoeval import COCOeval, maskUtils
+
+from detectron2.structures import BoxMode, RotatedBoxes, pairwise_iou_rotated
+from detectron2.utils.file_io import PathManager
+
+from .coco_evaluation import COCOEvaluator
+
+
+class RotatedCOCOeval(COCOeval):
+    @staticmethod
+    def is_rotated(box_list):
+        if type(box_list) == np.ndarray:
+            return box_list.shape[1] == 5
+        elif type(box_list) == list:
+            if box_list == []:  # cannot decide the box_dim
+                return False
+            return np.all(
+                np.array(
+                    [
+                        (len(obj) == 5) and ((type(obj) == list) or (type(obj) == np.ndarray))
+                        for obj in box_list
+                    ]
+                )
+            )
+        return False
+
+    @staticmethod
+    def boxlist_to_tensor(boxlist, output_box_dim):
+        if type(boxlist) == np.ndarray:
+            box_tensor = torch.from_numpy(boxlist)
+        elif type(boxlist) == list:
+            if boxlist == []:
+                return torch.zeros((0, output_box_dim), dtype=torch.float32)
+            else:
+                box_tensor = torch.FloatTensor(boxlist)
+        else:
+            raise Exception("Unrecognized boxlist type")
+
+        input_box_dim = box_tensor.shape[1]
+        if input_box_dim != output_box_dim:
+            if input_box_dim == 4 and output_box_dim == 5:
+                box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
+            else:
+                raise Exception(
+                    "Unable to convert from {}-dim box to {}-dim box".format(
+                        input_box_dim, output_box_dim
+                    )
+                )
+        return box_tensor
+
+    def compute_iou_dt_gt(self, dt, gt, is_crowd):
+        if self.is_rotated(dt) or self.is_rotated(gt):
+            # TODO: take is_crowd into consideration
+            assert all(c == 0 for c in is_crowd)
+            dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5))
+            gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5))
+            return pairwise_iou_rotated(dt, gt)
+        else:
+            # This is the same as the classical COCO evaluation
+            return maskUtils.iou(dt, gt, is_crowd)
+
+    def computeIoU(self, imgId, catId):
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId, catId]
+            dt = self._dts[imgId, catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        if len(gt) == 0 and len(dt) == 0:
+            return []
+        inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
+        dt = [dt[i] for i in inds]
+        if len(dt) > p.maxDets[-1]:
+            dt = dt[0 : p.maxDets[-1]]
+
+        assert p.iouType == "bbox", "unsupported iouType for iou computation"
+
+        g = [g["bbox"] for g in gt]
+        d = [d["bbox"] for d in dt]
+
+        # compute iou between each dt and gt region
+        iscrowd = [int(o["iscrowd"]) for o in gt]
+
+        # Note: this function is copied from cocoeval.py in cocoapi
+        # and the major difference is here.
+        ious = self.compute_iou_dt_gt(d, g, iscrowd)
+        return ious
+
+
+class RotatedCOCOEvaluator(COCOEvaluator):
+    """
+    Evaluate object proposal/instance detection outputs using COCO-like metrics and APIs,
+    with rotated boxes support.
+    Note: this uses IOU only and does not consider angle differences.
+    """
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a COCO model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+        """
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+
+            if "instances" in output:
+                instances = output["instances"].to(self._cpu_device)
+
+                prediction["instances"] = self.instances_to_json(instances, input["image_id"])
+            if "proposals" in output:
+                prediction["proposals"] = output["proposals"].to(self._cpu_device)
+            self._predictions.append(prediction)
+
+    def instances_to_json(self, instances, img_id):
+        num_instance = len(instances)
+        if num_instance == 0:
+            return []
+
+        boxes = instances.pred_boxes.tensor.numpy()
+        if boxes.shape[1] == 4:
+            boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+        boxes = boxes.tolist()
+        scores = instances.scores.tolist()
+        classes = instances.pred_classes.tolist()
+
+        results = []
+        for k in range(num_instance):
+            result = {
+                "image_id": img_id,
+                "category_id": classes[k],
+                "bbox": boxes[k],
+                "score": scores[k],
+            }
+
+            results.append(result)
+        return results
+
+    def _eval_predictions(self, predictions, img_ids=None):  # img_ids: unused
+        """
+        Evaluate predictions on the given tasks.
+        Fill self._results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+
+        # unmap the category ids for COCO
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            reverse_id_mapping = {
+                v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+            }
+            for result in coco_results:
+                result["category_id"] = reverse_id_mapping[result["category_id"]]
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(coco_results))
+                f.flush()
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info("Evaluating predictions ...")
+
+        assert self._tasks is None or set(self._tasks) == {
+            "bbox"
+        }, "[RotatedCOCOEvaluator] Only bbox evaluation is supported"
+        coco_eval = (
+            self._evaluate_predictions_on_coco(self._coco_api, coco_results)
+            if len(coco_results) > 0
+            else None  # cocoapi does not handle empty results very well
+        )
+
+        task = "bbox"
+        res = self._derive_coco_results(
+            coco_eval, task, class_names=self._metadata.get("thing_classes")
+        )
+        self._results[task] = res
+
+    def _evaluate_predictions_on_coco(self, coco_gt, coco_results):
+        """
+        Evaluate the coco results using COCOEval API.
+        """
+        assert len(coco_results) > 0
+
+        coco_dt = coco_gt.loadRes(coco_results)
+
+        # Only bbox is supported for now
+        coco_eval = RotatedCOCOeval(coco_gt, coco_dt, iouType="bbox")
+
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+
+        return coco_eval
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/rrc_evaluation_funcs.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/rrc_evaluation_funcs.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ed93d79af857c11a92efbb7d3cd058d519ac124
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/rrc_evaluation_funcs.py
@@ -0,0 +1,482 @@
+#!/usr/bin/env python2
+#encoding: UTF-8
+import json
+import sys;sys.path.append('./')
+import zipfile
+import re
+import sys
+import os
+import codecs
+import importlib
+from io import StringIO
+
+from shapely.geometry import *
+
+def print_help():
+    sys.stdout.write('Usage: python %s.py -g=<gtFile> -s=<submFile> [-o=<outputFolder> -p=<jsonParams>]' %sys.argv[0])
+    sys.exit(2)
+    
+
+def load_zip_file_keys(file,fileNameRegExp=''):
+    """
+    Returns an array with the entries of the ZIP file that match with the regular expression.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive.')
+
+    pairs = []
+    
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+                    
+        if addFile:
+            pairs.append( keyName )
+                
+    return pairs
+    
+
+def load_zip_file(file,fileNameRegExp='',allEntries=False):
+    """
+    Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive')    
+
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        
+        if addFile:
+            pairs.append( [ keyName , archive.read(name)] )
+        else:
+            if allEntries:
+                raise Exception('ZIP entry not valid: %s' %name)             
+
+    return dict(pairs)
+	
+def decode_utf8(raw):
+    """
+    Returns a Unicode object on success, or None on failure
+    """
+    try:
+        raw = codecs.decode(raw,'utf-8', 'replace')
+        #extracts BOM if exists
+        raw = raw.encode('utf8')
+        if raw.startswith(codecs.BOM_UTF8):
+            raw = raw.replace(codecs.BOM_UTF8, '', 1)
+        return raw.decode('utf-8')
+    except:
+       return None
+
+def validate_lines_in_file_gt(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+
+def validate_lines_in_file(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+    
+def validate_tl_line_gt(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    """
+    get_tl_line_values_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)   
+   
+def validate_tl_line(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    """
+    get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+    
+def get_tl_line_values_gt(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    
+    if LTRB:
+        # do not use
+        raise Exception('Not implemented.')
+
+    else:
+        # if withTranscription and withConfidence:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 -2 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:-2]]
+        #     except Exception as e:
+        #         raise(e)
+        # elif withConfidence:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 -1 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:-1]]
+        #     except Exception as e:
+        #         raise(e)
+        # elif withTranscription:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 -1 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:-1]]
+        #     except Exception as e:
+        #         raise(e)
+        # else:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:]]
+        #     except Exception as e:
+        #         raise(e)
+        
+        if withTranscription and withConfidence:
+            raise('not implemented')
+        elif withConfidence:
+            raise('not implemented')
+        elif withTranscription:
+            ptr = line.strip().split(',####')
+            cors = ptr[0].split(',')
+            recs = ptr[1].strip()
+            assert(len(cors)%2 == 0), 'num cors should be even.'
+            try:
+                points = [ float(ic) for ic in cors[:]]
+            except Exception as e:
+                raise(e)
+        else:
+            raise('not implemented')
+
+        validate_clockwise_points(points)
+        
+        if (imWidth>0 and imHeight>0):
+            for ip in range(0, len(points), 2):
+                validate_point_inside_bounds(points[ip],points[ip+1],imWidth,imHeight);
+            
+    
+    if withConfidence:
+        try:
+            confidence = 1.0
+        except ValueError:
+            raise Exception("Confidence value must be a float")       
+            
+    if withTranscription:
+        # posTranscription = numPoints + (2 if withConfidence else 1)
+        # transcription = cors[-1].strip()
+        transcription = recs
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    
+    return points,confidence,transcription
+
+def get_tl_line_values(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    
+    if LTRB:
+        # do not use
+        raise Exception('Not implemented.')
+
+    else:
+        if withTranscription and withConfidence:
+            raise('not implemented')
+        elif withConfidence:
+            raise('not implemented')
+        elif withTranscription:
+            ptr = line.strip().split(',####')
+            cors = ptr[0].split(',')
+            recs = ptr[1].strip()
+            assert(len(cors)%2 == 0), 'num cors should be even.'
+            try:
+                points = [ float(ic) for ic in cors[:]]
+            except Exception as e:
+                raise(e)
+        else:
+            raise('not implemented')
+        
+        # print('det clock wise')
+        validate_clockwise_points(points)
+        
+        if (imWidth>0 and imHeight>0):
+            for ip in range(0, len(points), 2):
+                validate_point_inside_bounds(points[ip],points[ip+1],imWidth,imHeight);
+            
+    
+    if withConfidence:
+        try:
+            confidence = 1.0
+        except ValueError:
+            raise Exception("Confidence value must be a float")       
+            
+    if withTranscription:
+        # posTranscription = numPoints + (2 if withConfidence else 1)
+        transcription = recs
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    
+    return points,confidence,transcription
+    
+            
+def validate_point_inside_bounds(x,y,imWidth,imHeight):
+    if(x<0 or x>imWidth):
+            raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %(xmin,imWidth,imHeight))
+    if(y<0 or y>imHeight):
+            raise Exception("Y value (%s)  not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" %(ymin,imWidth,imHeight))
+
+def validate_clockwise_points(points):
+    """
+    Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
+    """
+    
+    # if len(points) != 8:
+    #     raise Exception("Points list not valid." + str(len(points)))
+    
+    # point = [
+    #             [int(points[0]) , int(points[1])],
+    #             [int(points[2]) , int(points[3])],
+    #             [int(points[4]) , int(points[5])],
+    #             [int(points[6]) , int(points[7])]
+    #         ]
+    # edge = [
+    #             ( point[1][0] - point[0][0])*( point[1][1] + point[0][1]),
+    #             ( point[2][0] - point[1][0])*( point[2][1] + point[1][1]),
+    #             ( point[3][0] - point[2][0])*( point[3][1] + point[2][1]),
+    #             ( point[0][0] - point[3][0])*( point[0][1] + point[3][1])
+    # ]
+    
+    # summatory = edge[0] + edge[1] + edge[2] + edge[3];
+    # if summatory>0:
+    #     raise Exception("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+    pts = [(points[j], points[j+1]) for j in range(0,len(points),2)]
+    try:
+        pdet = Polygon(pts)
+    except:
+        assert(0), ('not a valid polygon', pts)
+    # The polygon should be valid.
+    if not pdet.is_valid: 
+        assert(0), ('polygon has intersection sides', pts)
+    pRing = LinearRing(pts)
+    if pRing.is_ccw:
+        assert(0),  ("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+        
+def get_tl_line_values_from_file_contents(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]        
+        
+    return pointsList,confidencesList,transcriptionsList
+
+def get_tl_line_values_from_file_contents_det(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]        
+        
+    return pointsList,confidencesList,transcriptionsList
+
+def main_evaluation(p,det_file, gt_file, default_evaluation_params_fn,validate_data_fn,evaluate_method_fn,show_result=True,per_sample=True):
+    """
+    This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
+    Params:
+    p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
+    """
+    
+    # if (p == None):
+    #     p = dict([s[1:].split('=') for s in sys.argv[1:]])
+    #     if(len(sys.argv)<3):
+    #         print_help()
+    p = {}
+    p['g'] =gt_file  #'tttgt.zip'
+    p['s'] =det_file #'det.zip'
+
+    evalParams = default_evaluation_params_fn()
+    if 'p' in p.keys():
+        evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+
+    resDict={'calculated':True,'Message':'','method':'{}','per_sample':'{}'}    
+    # try:
+    validate_data_fn(p['g'], p['s'], evalParams)  
+    evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
+    resDict.update(evalData)
+        
+    # except Exception as e:
+        # resDict['Message']= str(e)
+        # resDict['calculated']=False
+
+    if 'o' in p:
+        if not os.path.exists(p['o']):
+            os.makedirs(p['o'])
+
+        resultsOutputname = p['o'] + '/results.zip'
+        outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
+
+        del resDict['per_sample']
+        if 'output_items' in resDict.keys():
+            del resDict['output_items']
+
+        outZip.writestr('method.json',json.dumps(resDict))
+        
+    if not resDict['calculated']:
+        if show_result:
+            sys.stderr.write('Error!\n'+ resDict['Message']+'\n\n')
+        if 'o' in p:
+            outZip.close()
+        return resDict
+    
+    if 'o' in p:
+        if per_sample == True:
+            for k,v in evalData['per_sample'].items():
+                outZip.writestr( k + '.json',json.dumps(v)) 
+
+            if 'output_items' in evalData.keys():
+                for k, v in evalData['output_items'].items():
+                    outZip.writestr( k,v) 
+
+        outZip.close()
+
+    if show_result:
+        sys.stdout.write("Calculated!")
+        sys.stdout.write('\n')
+        sys.stdout.write(json.dumps(resDict['e2e_method']))
+        sys.stdout.write('\n')
+        sys.stdout.write(json.dumps(resDict['det_only_method']))
+        sys.stdout.write('\n')
+    
+    return resDict
+
+
+def main_validation(default_evaluation_params_fn,validate_data_fn):
+    """
+    This process validates a method
+    Params:
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    """    
+    try:
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        evalParams = default_evaluation_params_fn()
+        if 'p' in p.keys():
+            evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+
+        validate_data_fn(p['g'], p['s'], evalParams)              
+        print('SUCCESS')
+        sys.exit(0)
+    except Exception as e:
+        print(str(e))
+        sys.exit(101)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/rrc_evaluation_funcs_ic15.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/rrc_evaluation_funcs_ic15.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba6c70f3605eaaaffad624db0e1099fcfada8179
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/rrc_evaluation_funcs_ic15.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python2
+#encoding: UTF-8
+import json
+import sys;sys.path.append('./')
+import zipfile
+import re
+import sys
+import os
+import codecs
+import importlib
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+def print_help():
+    sys.stdout.write('Usage: python %s.py -g=<gtFile> -s=<submFile> [-o=<outputFolder> -p=<jsonParams>]' %sys.argv[0])
+    sys.exit(2)
+    
+
+def load_zip_file_keys(file,fileNameRegExp=''):
+    """
+    Returns an array with the entries of the ZIP file that match with the regular expression.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive.')
+
+    pairs = []
+    
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+                    
+        if addFile:
+            pairs.append( keyName )
+                
+    return pairs
+    
+
+def load_zip_file(file,fileNameRegExp='',allEntries=False):
+    """
+    Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive')    
+
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        
+        if addFile:
+            pairs.append( [ keyName , archive.read(name)] )
+        else:
+            if allEntries:
+                raise Exception('ZIP entry not valid: %s' %name)             
+
+    return dict(pairs)
+	
+def decode_utf8(raw):
+    """
+    Returns a Unicode object on success, or None on failure
+    """
+    try:
+        raw = codecs.decode(raw,'utf-8', 'replace')
+        #extracts BOM if exists
+        raw = raw.encode('utf8')
+        if raw.startswith(codecs.BOM_UTF8):
+            raw = raw.replace(codecs.BOM_UTF8, '', 1)
+        return raw.decode('utf-8')
+    except:
+       return None
+   
+def validate_lines_in_file(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+    
+   
+   
+def validate_tl_line(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    """
+    get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+    
+   
+def get_tl_line_values(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    
+    numPoints = 4;
+    
+    if LTRB:
+    
+        numPoints = 4;
+        
+        if withTranscription and withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+            if m == None :
+                m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence,transcription")
+        elif withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence")
+        elif withTranscription:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,transcription")
+        else:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,?\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax")
+            
+        xmin = int(m.group(1))
+        ymin = int(m.group(2))
+        xmax = int(m.group(3))
+        ymax = int(m.group(4))
+        if(xmax<xmin):
+                raise Exception("Xmax value (%s) not valid (Xmax < Xmin)." %(xmax))
+        if(ymax<ymin):
+                raise Exception("Ymax value (%s)  not valid (Ymax < Ymin)." %(ymax))  
+
+        points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
+        
+        if (imWidth>0 and imHeight>0):
+            validate_point_inside_bounds(xmin,ymin,imWidth,imHeight);
+            validate_point_inside_bounds(xmax,ymax,imWidth,imHeight);
+
+    else:
+        numPoints = 8;
+        
+        if withTranscription and withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence,transcription")
+        elif withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence")
+        elif withTranscription:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,transcription")
+        else:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4")
+            
+        points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
+        validate_clockwise_points(points)
+        
+        if (imWidth>0 and imHeight>0):
+            validate_point_inside_bounds(points[0],points[1],imWidth,imHeight);
+            validate_point_inside_bounds(points[2],points[3],imWidth,imHeight);
+            validate_point_inside_bounds(points[4],points[5],imWidth,imHeight);
+            validate_point_inside_bounds(points[6],points[7],imWidth,imHeight);
+            
+    
+    if withConfidence:
+        try:
+            confidence = float(m.group(numPoints+1))
+        except ValueError:
+            raise Exception("Confidence value must be a float")       
+            
+    if withTranscription:
+        posTranscription = numPoints + (2 if withConfidence else 1)
+        transcription = m.group(posTranscription)
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    
+    return points,confidence,transcription
+    
+            
+def validate_point_inside_bounds(x,y,imWidth,imHeight):
+    if(x<0 or x>imWidth):
+            raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %(xmin,imWidth,imHeight))
+    if(y<0 or y>imHeight):
+            raise Exception("Y value (%s)  not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" %(ymin,imWidth,imHeight))
+
+def validate_clockwise_points(points):
+    """
+    Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
+    """
+    
+    if len(points) != 8:
+        raise Exception("Points list not valid." + str(len(points)))
+    
+    point = [
+                [int(points[0]) , int(points[1])],
+                [int(points[2]) , int(points[3])],
+                [int(points[4]) , int(points[5])],
+                [int(points[6]) , int(points[7])]
+            ]
+    edge = [
+                ( point[1][0] - point[0][0])*( point[1][1] + point[0][1]),
+                ( point[2][0] - point[1][0])*( point[2][1] + point[1][1]),
+                ( point[3][0] - point[2][0])*( point[3][1] + point[2][1]),
+                ( point[0][0] - point[3][0])*( point[0][1] + point[3][1])
+    ]
+    
+    summatory = edge[0] + edge[1] + edge[2] + edge[3];
+    if summatory>0:
+        raise Exception("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+
+def get_tl_line_values_from_file_contents(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]        
+        
+    return pointsList,confidencesList,transcriptionsList
+
+def main_evaluation(p,default_evaluation_params_fn,validate_data_fn,evaluate_method_fn,show_result=True,per_sample=True):
+    """
+    This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
+    Params:
+    p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
+    """
+    
+    if (p == None):
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        if(len(sys.argv)<3):
+            print_help()
+
+    evalParams = default_evaluation_params_fn()
+    if 'p' in p.keys():
+        evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+
+    resDict={'calculated':True,'Message':'','method':'{}','per_sample':'{}'}    
+    try:
+        validate_data_fn(p['g'], p['s'], evalParams)  
+        evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
+        resDict.update(evalData)
+        
+    except Exception as e:
+        resDict['Message']= str(e)
+        resDict['calculated']=False
+
+    if 'o' in p:
+        if not os.path.exists(p['o']):
+            os.makedirs(p['o'])
+
+        resultsOutputname = p['o'] + '/results.zip'
+        outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
+
+        del resDict['per_sample']
+        if 'output_items' in resDict.keys():
+            del resDict['output_items']
+
+        outZip.writestr('method.json',json.dumps(resDict))
+        
+    if not resDict['calculated']:
+        if show_result:
+            sys.stderr.write('Error!\n'+ resDict['Message']+'\n\n')
+        if 'o' in p:
+            outZip.close()
+        return resDict
+    
+    if 'o' in p:
+        if per_sample == True:
+            for k,v in evalData['per_sample'].items():
+                outZip.writestr( k + '.json',json.dumps(v)) 
+
+            if 'output_items' in evalData.keys():
+                for k, v in evalData['output_items'].items():
+                    outZip.writestr( k,v) 
+
+        outZip.close()
+
+    if show_result:
+        sys.stdout.write("Calculated!")
+        sys.stdout.write(json.dumps(resDict['method']))
+    
+    return resDict
+
+
+def main_validation(default_evaluation_params_fn,validate_data_fn):
+    """
+    This process validates a method
+    Params:
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    """    
+    try:
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        evalParams = default_evaluation_params_fn()
+        if 'p' in p.keys():
+            evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+
+        validate_data_fn(p['g'], p['s'], evalParams)              
+        print('SUCCESS')
+        sys.exit(0)
+    except Exception as e:
+        print(str(e))
+        sys.exit(101)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/sem_seg_evaluation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/sem_seg_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a19db71562ef47569dc7f77ec616af85447f0ec
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/sem_seg_evaluation.py
@@ -0,0 +1,184 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import json
+import logging
+import numpy as np
+import os
+from collections import OrderedDict
+import PIL.Image as Image
+import pycocotools.mask as mask_util
+import torch
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.comm import all_gather, is_main_process, synchronize
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+
+class SemSegEvaluator(DatasetEvaluator):
+    """
+    Evaluate semantic segmentation metrics.
+    """
+
+    def __init__(
+        self,
+        dataset_name,
+        distributed=True,
+        output_dir=None,
+        *,
+        num_classes=None,
+        ignore_label=None,
+    ):
+        """
+        Args:
+            dataset_name (str): name of the dataset to be evaluated.
+            distributed (bool): if True, will collect results from all ranks for evaluation.
+                Otherwise, will evaluate the results in the current process.
+            output_dir (str): an output directory to dump results.
+            num_classes, ignore_label: deprecated argument
+        """
+        self._logger = logging.getLogger(__name__)
+        if num_classes is not None:
+            self._logger.warn(
+                "SemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata."
+            )
+        if ignore_label is not None:
+            self._logger.warn(
+                "SemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata."
+            )
+        self._dataset_name = dataset_name
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+
+        self.input_file_to_gt_file = {
+            dataset_record["file_name"]: dataset_record["sem_seg_file_name"]
+            for dataset_record in DatasetCatalog.get(dataset_name)
+        }
+
+        meta = MetadataCatalog.get(dataset_name)
+        # Dict that maps contiguous training ids to COCO category ids
+        try:
+            c2d = meta.stuff_dataset_id_to_contiguous_id
+            self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()}
+        except AttributeError:
+            self._contiguous_id_to_dataset_id = None
+        self._class_names = meta.stuff_classes
+        self._num_classes = len(meta.stuff_classes)
+        if num_classes is not None:
+            assert self._num_classes == num_classes, f"{self._num_classes} != {num_classes}"
+        self._ignore_label = ignore_label if ignore_label is not None else meta.ignore_label
+
+    def reset(self):
+        self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64)
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a model.
+                It is a list of dicts. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name".
+            outputs: the outputs of a model. It is either list of semantic segmentation predictions
+                (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic
+                segmentation prediction in the same format.
+        """
+        for input, output in zip(inputs, outputs):
+            output = output["sem_seg"].argmax(dim=0).to(self._cpu_device)
+            pred = np.array(output, dtype=np.int)
+            with PathManager.open(self.input_file_to_gt_file[input["file_name"]], "rb") as f:
+                gt = np.array(Image.open(f), dtype=np.int)
+
+            gt[gt == self._ignore_label] = self._num_classes
+
+            self._conf_matrix += np.bincount(
+                (self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1),
+                minlength=self._conf_matrix.size,
+            ).reshape(self._conf_matrix.shape)
+
+            self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"]))
+
+    def evaluate(self):
+        """
+        Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval):
+
+        * Mean intersection-over-union averaged across classes (mIoU)
+        * Frequency Weighted IoU (fwIoU)
+        * Mean pixel accuracy averaged across classes (mACC)
+        * Pixel Accuracy (pACC)
+        """
+        if self._distributed:
+            synchronize()
+            conf_matrix_list = all_gather(self._conf_matrix)
+            self._predictions = all_gather(self._predictions)
+            self._predictions = list(itertools.chain(*self._predictions))
+            if not is_main_process():
+                return
+
+            self._conf_matrix = np.zeros_like(self._conf_matrix)
+            for conf_matrix in conf_matrix_list:
+                self._conf_matrix += conf_matrix
+
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "sem_seg_predictions.json")
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(self._predictions))
+
+        acc = np.full(self._num_classes, np.nan, dtype=np.float)
+        iou = np.full(self._num_classes, np.nan, dtype=np.float)
+        tp = self._conf_matrix.diagonal()[:-1].astype(np.float)
+        pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float)
+        class_weights = pos_gt / np.sum(pos_gt)
+        pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float)
+        acc_valid = pos_gt > 0
+        acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid]
+        iou_valid = (pos_gt + pos_pred) > 0
+        union = pos_gt + pos_pred - tp
+        iou[acc_valid] = tp[acc_valid] / union[acc_valid]
+        macc = np.sum(acc[acc_valid]) / np.sum(acc_valid)
+        miou = np.sum(iou[acc_valid]) / np.sum(iou_valid)
+        fiou = np.sum(iou[acc_valid] * class_weights[acc_valid])
+        pacc = np.sum(tp) / np.sum(pos_gt)
+
+        res = {}
+        res["mIoU"] = 100 * miou
+        res["fwIoU"] = 100 * fiou
+        for i, name in enumerate(self._class_names):
+            res["IoU-{}".format(name)] = 100 * iou[i]
+        res["mACC"] = 100 * macc
+        res["pACC"] = 100 * pacc
+        for i, name in enumerate(self._class_names):
+            res["ACC-{}".format(name)] = 100 * acc[i]
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(res, f)
+        results = OrderedDict({"sem_seg": res})
+        self._logger.info(results)
+        return results
+
+    def encode_json_sem_seg(self, sem_seg, input_file_name):
+        """
+        Convert semantic segmentation to COCO stuff format with segments encoded as RLEs.
+        See http://cocodataset.org/#format-results
+        """
+        json_list = []
+        for label in np.unique(sem_seg):
+            if self._contiguous_id_to_dataset_id is not None:
+                assert (
+                    label in self._contiguous_id_to_dataset_id
+                ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name)
+                dataset_id = self._contiguous_id_to_dataset_id[label]
+            else:
+                dataset_id = int(label)
+            mask = (sem_seg == label).astype(np.uint8)
+            mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0]
+            mask_rle["counts"] = mask_rle["counts"].decode("utf-8")
+            json_list.append(
+                {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle}
+            )
+        return json_list
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/testing.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..04517f51548c085e6a9f56c943a17421ef07a388
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/testing.py
@@ -0,0 +1,83 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+import pprint
+import sys
+from collections import OrderedDict
+from collections.abc import Mapping
+
+
+def print_csv_format(results):
+    """
+    Print main metrics in a format similar to Detectron,
+    so that they are easy to copypaste into a spreadsheet.
+
+    Args:
+        results (OrderedDict[dict]): task_name -> {metric -> score}
+    """
+    # unordered results cannot be properly printed
+    assert isinstance(results, OrderedDict) or not len(results), results
+    logger = logging.getLogger(__name__)
+    for task, res in results.items():
+        # Don't print "AP-category" metrics since they are usually not tracked.
+        important_res = [(k, v) for k, v in res.items() if "-" not in k]
+        logger.info("copypaste: Task: {}".format(task))
+        logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
+        logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
+
+
+def verify_results(cfg, results):
+    """
+    Args:
+        results (OrderedDict[dict]): task_name -> {metric -> score}
+
+    Returns:
+        bool: whether the verification succeeds or not
+    """
+    expected_results = cfg.TEST.EXPECTED_RESULTS
+    if not len(expected_results):
+        return True
+
+    ok = True
+    for task, metric, expected, tolerance in expected_results:
+        actual = results[task].get(metric, None)
+        if actual is None:
+            ok = False
+            continue
+        if not np.isfinite(actual):
+            ok = False
+            continue
+        diff = abs(actual - expected)
+        if diff > tolerance:
+            ok = False
+
+    logger = logging.getLogger(__name__)
+    if not ok:
+        logger.error("Result verification failed!")
+        logger.error("Expected Results: " + str(expected_results))
+        logger.error("Actual Results: " + pprint.pformat(results))
+
+        sys.exit(1)
+    else:
+        logger.info("Results verification passed.")
+    return ok
+
+
+def flatten_results_dict(results):
+    """
+    Expand a hierarchical dict of scalars into a flat dict of scalars.
+    If results[k1][k2][k3] = v, the returned dict will have the entry
+    {"k1/k2/k3": v}.
+
+    Args:
+        results (dict):
+    """
+    r = {}
+    for k, v in results.items():
+        if isinstance(v, Mapping):
+            v = flatten_results_dict(v)
+            for kk, vv in v.items():
+                r[k + "/" + kk] = vv
+        else:
+            r[k] = v
+    return r
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/text_eval_script.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/text_eval_script.py
new file mode 100644
index 0000000000000000000000000000000000000000..37758770740bb3ddcad6520f6d5887669f9d8183
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/text_eval_script.py
@@ -0,0 +1,472 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# encoding=utf8
+from collections import namedtuple
+from detectron2.evaluation import rrc_evaluation_funcs
+import importlib
+import sys
+
+import math 
+
+from rapidfuzz import string_metric
+
+WORD_SPOTTING =True
+def evaluation_imports():
+    """
+    evaluation_imports: Dictionary ( key = module name , value = alias  )  with python modules used in the evaluation. 
+    """      
+    return {
+            'Polygon':'plg',
+            'numpy':'np'
+            }
+
+def default_evaluation_params():
+    """
+    default_evaluation_params: Default parameters to use for the validation and evaluation.
+    """
+    global WORD_SPOTTING          
+    return {
+            'IOU_CONSTRAINT' :0.5,
+            'AREA_PRECISION_CONSTRAINT' :0.5,
+            'WORD_SPOTTING' :WORD_SPOTTING,
+            'MIN_LENGTH_CARE_WORD' :3,
+            'GT_SAMPLE_NAME_2_ID':'([0-9]+).txt',
+            'DET_SAMPLE_NAME_2_ID':'([0-9]+).txt',            
+            'LTRB':False, #LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
+            'CRLF':False, # Lines are delimited by Windows CRLF format
+            'CONFIDENCES':False, #Detections must include confidence value. MAP and MAR will be calculated,
+            'SPECIAL_CHARACTERS':str('!?.:,*"()·[]/\''),
+            'ONLY_REMOVE_FIRST_LAST_CHARACTER' : True
+        }
+
+def validate_data(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method validate_data: validates that all files in the results folder are correct (have the correct name contents).
+                            Validates also that there are no missing files in the folder.
+                            If some error detected, the method raises the error
+    """
+    gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    
+    subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
+
+    #Validate format of GroundTruth
+    for k in gt:
+        rrc_evaluation_funcs.validate_lines_in_file_gt(k,gt[k],evaluationParams['CRLF'],evaluationParams['LTRB'],True)
+
+    #Validate format of results
+    for k in subm:
+        if (k in gt) == False :
+            raise Exception("The sample %s not present in GT" %k)
+        
+        rrc_evaluation_funcs.validate_lines_in_file(k,subm[k],evaluationParams['CRLF'],evaluationParams['LTRB'],True,evaluationParams['CONFIDENCES'])
+
+    
+def evaluate_method(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method evaluate_method: evaluate method and returns the results
+        Results. Dictionary with the following values:
+        - method (required)  Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 }
+        - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 }
+    """  
+    for module,alias in evaluation_imports().items():
+        globals()[alias] = importlib.import_module(module)
+
+    def polygon_from_points(points):
+        """
+        Returns a Polygon object to use with the Polygon2 class from a list of 8 points: x1,y1,x2,y2,x3,y3,x4,y4
+        """        
+        num_points = len(points)
+        # resBoxes=np.empty([1,num_points],dtype='int32')
+        resBoxes=np.empty([1,num_points],dtype='float32')
+        for inp in range(0, num_points, 2):
+            resBoxes[0, int(inp/2)] = float(points[int(inp)])
+            resBoxes[0, int(inp/2+num_points/2)] = float(points[int(inp+1)])
+        pointMat = resBoxes[0].reshape([2,int(num_points/2)]).T
+        return plg.Polygon(pointMat)    
+
+    def rectangle_to_polygon(rect):
+        resBoxes=np.empty([1,8],dtype='int32')
+        resBoxes[0,0]=int(rect.xmin)
+        resBoxes[0,4]=int(rect.ymax)
+        resBoxes[0,1]=int(rect.xmin)
+        resBoxes[0,5]=int(rect.ymin)
+        resBoxes[0,2]=int(rect.xmax)
+        resBoxes[0,6]=int(rect.ymin)
+        resBoxes[0,3]=int(rect.xmax)
+        resBoxes[0,7]=int(rect.ymax)
+
+        pointMat = resBoxes[0].reshape([2,4]).T
+        
+        return plg.Polygon( pointMat)
+    
+    def rectangle_to_points(rect):
+        points = [int(rect.xmin), int(rect.ymax), int(rect.xmax), int(rect.ymax), int(rect.xmax), int(rect.ymin), int(rect.xmin), int(rect.ymin)]
+        return points
+        
+    def get_union(pD,pG):
+        areaA = pD.area();
+        areaB = pG.area();
+        return areaA + areaB - get_intersection(pD, pG);
+        
+    def get_intersection_over_union(pD,pG):
+        try:
+            return get_intersection(pD, pG) / get_union(pD, pG);
+        except:
+            return 0
+        
+    def get_intersection(pD,pG):
+        pInt = pD & pG
+        if len(pInt) == 0:
+            return 0
+        return pInt.area()
+    
+    def compute_ap(confList, matchList,numGtCare):
+        correct = 0
+        AP = 0
+        if len(confList)>0:
+            confList = np.array(confList)
+            matchList = np.array(matchList)
+            sorted_ind = np.argsort(-confList)
+            confList = confList[sorted_ind]
+            matchList = matchList[sorted_ind]
+            for n in range(len(confList)):
+                match = matchList[n]
+                if match:
+                    correct += 1
+                    AP += float(correct)/(n + 1)
+
+            if numGtCare>0:
+                AP /= numGtCare
+            
+        return AP  
+    
+    def transcription_match(transGt,transDet,specialCharacters=str(r'!?.:,*"()·[]/\''),onlyRemoveFirstLastCharacterGT=True):
+        
+        if onlyRemoveFirstLastCharacterGT:
+            #special characters in GT are allowed only at initial or final position
+            if (transGt==transDet):
+                return True        
+
+            if specialCharacters.find(transGt[0])>-1:
+                if transGt[1:]==transDet:
+                    return True
+
+            if specialCharacters.find(transGt[-1])>-1:
+                if transGt[0:len(transGt)-1]==transDet:
+                    return True
+
+            if specialCharacters.find(transGt[0])>-1 and specialCharacters.find(transGt[-1])>-1:
+                if transGt[1:len(transGt)-1]==transDet:
+                    return True
+            return False
+        else:
+            #Special characters are removed from the begining and the end of both Detection and GroundTruth
+            while len(transGt)>0 and specialCharacters.find(transGt[0])>-1:
+                transGt = transGt[1:]
+				
+            while len(transDet)>0 and specialCharacters.find(transDet[0])>-1:
+                transDet = transDet[1:]
+                
+            while len(transGt)>0 and specialCharacters.find(transGt[-1])>-1 :
+                transGt = transGt[0:len(transGt)-1]
+                
+            while len(transDet)>0 and specialCharacters.find(transDet[-1])>-1:
+                transDet = transDet[0:len(transDet)-1]
+                
+            return transGt == transDet
+                    
+    
+    def include_in_dictionary(transcription):
+        """
+        Function used in Word Spotting that finds if the Ground Truth transcription meets the rules to enter into the dictionary. If not, the transcription will be cared as don't care
+        """        
+        #special case 's at final
+        if transcription[len(transcription)-2:]=="'s" or transcription[len(transcription)-2:]=="'S":
+            transcription = transcription[0:len(transcription)-2]
+        
+        #hypens at init or final of the word
+        transcription = transcription.strip('-');
+        
+        specialCharacters = str("'!?.:,*\"()·[]/");
+        for character in specialCharacters:
+            transcription = transcription.replace(character,' ')
+        
+        transcription = transcription.strip()
+        
+        if len(transcription) != len(transcription.replace(" ","")) :
+            return False;
+        
+        if len(transcription) < evaluationParams['MIN_LENGTH_CARE_WORD']:
+            return False;
+        
+        notAllowed = str("×÷·");
+        
+        range1 = [ ord(u'a'), ord(u'z') ]
+        range2 = [ ord(u'A'), ord(u'Z') ]
+        range3 = [ ord(u'À'), ord(u'ƿ') ]
+        range4 = [ ord(u'Ǆ'), ord(u'ɿ') ]
+        range5 = [ ord(u'Ά'), ord(u'Ͽ') ]
+        range6 = [ ord(u'-'), ord(u'-') ]
+        
+        for char in transcription :
+            charCode = ord(char)
+            if(notAllowed.find(char) != -1):
+                return False
+            
+            valid = ( charCode>=range1[0] and charCode<=range1[1] ) or ( charCode>=range2[0] and charCode<=range2[1] ) or ( charCode>=range3[0] and charCode<=range3[1] ) or ( charCode>=range4[0] and charCode<=range4[1] ) or ( charCode>=range5[0] and charCode<=range5[1] ) or ( charCode>=range6[0] and charCode<=range6[1] )
+            if valid == False:
+                return False
+        
+        return True
+    
+    def include_in_dictionary_transcription(transcription):
+        """
+        Function applied to the Ground Truth transcriptions used in Word Spotting. It removes special characters or terminations
+        """
+        #special case 's at final
+        if transcription[len(transcription)-2:]=="'s" or transcription[len(transcription)-2:]=="'S":
+            transcription = transcription[0:len(transcription)-2]
+        
+        #hypens at init or final of the word
+        transcription = transcription.strip('-');            
+        
+        specialCharacters = str("'!?.:,*\"()·[]/");
+        for character in specialCharacters:
+            transcription = transcription.replace(character,' ')
+        
+        transcription = transcription.strip()
+        
+        return transcription
+    
+    perSampleMetrics = {}
+    
+    matchedSum = 0
+    det_only_matchedSum = 0
+
+    Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
+    
+    gt = rrc_evaluation_funcs.load_zip_file(gtFilePath,evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    subm = rrc_evaluation_funcs.load_zip_file(submFilePath,evaluationParams['DET_SAMPLE_NAME_2_ID'],True)
+   
+    numGlobalCareGt = 0;
+    numGlobalCareDet = 0;
+    det_only_numGlobalCareGt = 0;
+    det_only_numGlobalCareDet = 0;
+   
+    arrGlobalConfidences = [];
+    arrGlobalMatches = [];
+
+    for resFile in gt:
+        # print('resgt', resFile)
+        gtFile = rrc_evaluation_funcs.decode_utf8(gt[resFile])
+        if (gtFile is None) :
+            raise Exception("The file %s is not UTF-8" %resFile)        
+
+        recall = 0
+        precision = 0
+        hmean = 0    
+        detCorrect = 0
+        detOnlyCorrect = 0
+        iouMat = np.empty([1,1])
+        gtPols = []
+        detPols = []
+        gtTrans = []
+        detTrans = []
+        gtPolPoints = []
+        detPolPoints = []  
+        gtDontCarePolsNum = [] #Array of Ground Truth Polygons' keys marked as don't Care
+        det_only_gtDontCarePolsNum = []
+        detDontCarePolsNum = [] #Array of Detected Polygons' matched with a don't Care GT
+        det_only_detDontCarePolsNum = []
+        detMatchedNums = []
+        pairs = []
+        
+        arrSampleConfidences = [];
+        arrSampleMatch = [];
+        sampleAP = 0;
+
+        pointsList,_,transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(gtFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True,False)
+
+        for n in range(len(pointsList)):
+            points = pointsList[n]
+            transcription = transcriptionsList[n]
+            det_only_dontCare = dontCare = transcription == "###" # ctw1500 and total_text gt have been modified to the same format.
+            if evaluationParams['LTRB']:
+                gtRect = Rectangle(*points)
+                gtPol = rectangle_to_polygon(gtRect)
+            else:
+                gtPol = polygon_from_points(points)
+            gtPols.append(gtPol)
+            gtPolPoints.append(points)
+
+            #On word spotting we will filter some transcriptions with special characters
+            if evaluationParams['WORD_SPOTTING'] :
+                if dontCare == False : 
+                    if include_in_dictionary(transcription) == False : 
+                        dontCare = True
+                    else:
+                        transcription = include_in_dictionary_transcription(transcription)
+
+            gtTrans.append(transcription)
+            if dontCare:
+                gtDontCarePolsNum.append( len(gtPols)-1 ) 
+            if det_only_dontCare:
+                det_only_gtDontCarePolsNum.append( len(gtPols)-1 ) 
+
+        
+        if resFile in subm:
+            
+            detFile = rrc_evaluation_funcs.decode_utf8(subm[resFile]) 
+                    
+            pointsList,confidencesList,transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents_det(detFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True,evaluationParams['CONFIDENCES'])
+            
+            for n in range(len(pointsList)):
+                points = pointsList[n]
+                transcription = transcriptionsList[n]
+                
+                if evaluationParams['LTRB']:
+                    detRect = Rectangle(*points)
+                    detPol = rectangle_to_polygon(detRect)
+                else:                    
+                    detPol = polygon_from_points(points)
+                detPols.append(detPol)
+                detPolPoints.append(points)
+                detTrans.append(transcription)
+
+                if len(gtDontCarePolsNum)>0 :
+                    for dontCarePol in gtDontCarePolsNum:
+                        dontCarePol = gtPols[dontCarePol]
+                        intersected_area = get_intersection(dontCarePol,detPol)
+                        pdDimensions = detPol.area()
+                        precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                        if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT'] ):
+                            detDontCarePolsNum.append( len(detPols)-1 )
+                            break
+
+                if len(det_only_gtDontCarePolsNum)>0 :
+                    for dontCarePol in det_only_gtDontCarePolsNum:
+                        dontCarePol = gtPols[dontCarePol]
+                        intersected_area = get_intersection(dontCarePol,detPol)
+                        pdDimensions = detPol.area()
+                        precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                        if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT'] ):
+                            det_only_detDontCarePolsNum.append( len(detPols)-1 )
+                            break
+                                 
+            
+            if len(gtPols)>0 and len(detPols)>0:
+                #Calculate IoU and precision matrixs
+                outputShape=[len(gtPols),len(detPols)]
+                iouMat = np.empty(outputShape)
+                gtRectMat = np.zeros(len(gtPols),np.int8)
+                detRectMat = np.zeros(len(detPols),np.int8)
+                det_only_gtRectMat = np.zeros(len(gtPols),np.int8)
+                det_only_detRectMat = np.zeros(len(detPols),np.int8)
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        pG = gtPols[gtNum]
+                        pD = detPols[detNum]
+                        iouMat[gtNum,detNum] = get_intersection_over_union(pD,pG)
+                
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum :
+                            if iouMat[gtNum,detNum]>evaluationParams['IOU_CONSTRAINT']:
+                                gtRectMat[gtNum] = 1
+                                detRectMat[detNum] = 1
+                                #detection matched only if transcription is equal
+                                # det_only_correct = True
+                                # detOnlyCorrect += 1
+                                if evaluationParams['WORD_SPOTTING']:
+                                    edd = string_metric.levenshtein(gtTrans[gtNum].upper(), detTrans[detNum].upper())
+                                    if edd<=0: 
+                                        correct = True
+                                    else:
+                                        correct = False
+                                    # correct = gtTrans[gtNum].upper() == detTrans[detNum].upper()
+                                else:
+                                    try:
+                                        correct = transcription_match(gtTrans[gtNum].upper(),detTrans[detNum].upper(),evaluationParams['SPECIAL_CHARACTERS'],evaluationParams['ONLY_REMOVE_FIRST_LAST_CHARACTER'])==True
+                                    except: # empty
+                                        correct = False
+                                detCorrect += (1 if correct else 0)
+                                if correct:
+                                    detMatchedNums.append(detNum)
+                
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        if det_only_gtRectMat[gtNum] == 0 and det_only_detRectMat[detNum] == 0 and gtNum not in det_only_gtDontCarePolsNum and detNum not in det_only_detDontCarePolsNum:
+                            if iouMat[gtNum,detNum]>evaluationParams['IOU_CONSTRAINT']:
+                                det_only_gtRectMat[gtNum] = 1
+                                det_only_detRectMat[detNum] = 1
+                                #detection matched only if transcription is equal
+                                det_only_correct = True
+                                detOnlyCorrect += 1
+                                                              
+                
+        numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
+        numDetCare = (len(detPols) - len(detDontCarePolsNum))
+        det_only_numGtCare = (len(gtPols) - len(det_only_gtDontCarePolsNum))
+        det_only_numDetCare = (len(detPols) - len(det_only_detDontCarePolsNum))
+        if numGtCare == 0:
+            recall = float(1)
+            precision = float(0) if numDetCare >0 else float(1)
+        else:
+            recall = float(detCorrect) / numGtCare
+            precision = 0 if numDetCare==0 else float(detCorrect) / numDetCare
+
+        if det_only_numGtCare == 0:
+            det_only_recall = float(1)
+            det_only_precision = float(0) if det_only_numDetCare >0 else float(1)
+        else:
+            det_only_recall = float(detOnlyCorrect) / det_only_numGtCare
+            det_only_precision = 0 if det_only_numDetCare==0 else float(detOnlyCorrect) / det_only_numDetCare
+
+        
+        hmean = 0 if (precision + recall)==0 else 2.0 * precision * recall / (precision + recall)
+        det_only_hmean = 0 if (det_only_precision + det_only_recall)==0 else 2.0 * det_only_precision * det_only_recall / (det_only_precision + det_only_recall)
+            
+        matchedSum += detCorrect
+        det_only_matchedSum += detOnlyCorrect
+        numGlobalCareGt += numGtCare
+        numGlobalCareDet += numDetCare
+        det_only_numGlobalCareGt += det_only_numGtCare
+        det_only_numGlobalCareDet += det_only_numDetCare
+
+        perSampleMetrics[resFile] = {
+                                        'precision':precision,
+                                        'recall':recall,
+                                        'hmean':hmean,
+                                        'iouMat':[] if len(detPols)>100 else iouMat.tolist(),
+                                        'gtPolPoints':gtPolPoints,
+                                        'detPolPoints':detPolPoints,
+                                        'gtTrans':gtTrans,
+                                        'detTrans':detTrans,
+                                        'gtDontCare':gtDontCarePolsNum,
+                                        'detDontCare':detDontCarePolsNum,
+                                        'evaluationParams': evaluationParams,
+                                    }
+        
+    
+    methodRecall = 0 if numGlobalCareGt == 0 else float(matchedSum)/numGlobalCareGt
+    methodPrecision = 0 if numGlobalCareDet == 0 else float(matchedSum)/numGlobalCareDet
+    methodHmean = 0 if methodRecall + methodPrecision==0 else 2* methodRecall * methodPrecision / (methodRecall + methodPrecision)
+
+    det_only_methodRecall = 0 if det_only_numGlobalCareGt == 0 else float(det_only_matchedSum)/det_only_numGlobalCareGt
+    det_only_methodPrecision = 0 if det_only_numGlobalCareDet == 0 else float(det_only_matchedSum)/det_only_numGlobalCareDet
+    det_only_methodHmean = 0 if det_only_methodRecall + det_only_methodPrecision==0 else 2* det_only_methodRecall * det_only_methodPrecision / (det_only_methodRecall + det_only_methodPrecision)
+
+    
+    methodMetrics = r"E2E_RESULTS: precision: {}, recall: {}, hmean: {}".format(methodPrecision, methodRecall, methodHmean)
+    det_only_methodMetrics = r"DETECTION_ONLY_RESULTS: precision: {}, recall: {}, hmean: {}".format(det_only_methodPrecision, det_only_methodRecall, det_only_methodHmean)
+    
+    
+    resDict = {'calculated':True,'Message':'','e2e_method': methodMetrics,'det_only_method': det_only_methodMetrics,'per_sample': perSampleMetrics}
+    
+    
+    return resDict;
+
+def text_eval_main(det_file, gt_file, is_word_spotting):
+    global WORD_SPOTTING
+    WORD_SPOTTING = is_word_spotting
+    return rrc_evaluation_funcs.main_evaluation(None,det_file, gt_file, default_evaluation_params,validate_data,evaluate_method)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/text_eval_script_ic15.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/text_eval_script_ic15.py
new file mode 100644
index 0000000000000000000000000000000000000000..a99bdd063a46fb58c55b6cc3022caa2b992bac42
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/text_eval_script_ic15.py
@@ -0,0 +1,501 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# encoding=utf8
+from collections import namedtuple
+from detectron2.evaluation import rrc_evaluation_funcs_ic15 as rrc_evaluation_funcs
+import importlib
+import sys
+
+import math 
+
+from rapidfuzz import string_metric
+
+WORD_SPOTTING =True
+def evaluation_imports():
+    """
+    evaluation_imports: Dictionary ( key = module name , value = alias  )  with python modules used in the evaluation. 
+    """      
+    return {
+            'Polygon':'plg',
+            'numpy':'np'
+            }
+
+def default_evaluation_params():
+    """
+    default_evaluation_params: Default parameters to use for the validation and evaluation.
+    """          
+    global WORD_SPOTTING
+    return {
+            'IOU_CONSTRAINT' :0.5,
+            'AREA_PRECISION_CONSTRAINT' :0.5,
+            'WORD_SPOTTING' :WORD_SPOTTING,
+            'MIN_LENGTH_CARE_WORD' :3,
+            'GT_SAMPLE_NAME_2_ID':'gt_img_([0-9]+).txt',
+            'DET_SAMPLE_NAME_2_ID':'res_img_([0-9]+).txt',            
+            'LTRB':False, #LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
+            'CRLF':False, # Lines are delimited by Windows CRLF format
+            'CONFIDENCES':False, #Detections must include confidence value. MAP and MAR will be calculated,
+            'SPECIAL_CHARACTERS':'!?.:,*"()·[]/\'',
+            'ONLY_REMOVE_FIRST_LAST_CHARACTER' : True
+        }
+
+def validate_data(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method validate_data: validates that all files in the results folder are correct (have the correct name contents).
+                            Validates also that there are no missing files in the folder.
+                            If some error detected, the method raises the error
+    """
+    gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
+    #Validate format of GroundTruth
+    for k in gt:
+        rrc_evaluation_funcs.validate_lines_in_file(k,gt[k],evaluationParams['CRLF'],evaluationParams['LTRB'],True)
+
+    #Validate format of results
+    for k in subm:
+        if (k in gt) == False :
+            raise Exception("The sample %s not present in GT" %k)
+        
+        rrc_evaluation_funcs.validate_lines_in_file(k,subm[k],evaluationParams['CRLF'],evaluationParams['LTRB'],True,evaluationParams['CONFIDENCES'])
+
+    
+def evaluate_method(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method evaluate_method: evaluate method and returns the results
+        Results. Dictionary with the following values:
+        - method (required)  Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 }
+        - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 }
+    """  
+    for module,alias in evaluation_imports().items():
+        globals()[alias] = importlib.import_module(module)
+
+    def polygon_from_points(points,correctOffset=False):
+        """
+        Returns a Polygon object to use with the Polygon2 class from a list of 8 points: x1,y1,x2,y2,x3,y3,x4,y4
+        """        
+        
+        if correctOffset: #this will substract 1 from the coordinates that correspond to the xmax and ymax
+            points[2] -= 1
+            points[4] -= 1
+            points[5] -= 1
+            points[7] -= 1
+            
+        resBoxes=np.empty([1,8],dtype='int32')
+        resBoxes[0,0]=int(points[0])
+        resBoxes[0,4]=int(points[1])
+        resBoxes[0,1]=int(points[2])
+        resBoxes[0,5]=int(points[3])
+        resBoxes[0,2]=int(points[4])
+        resBoxes[0,6]=int(points[5])
+        resBoxes[0,3]=int(points[6])
+        resBoxes[0,7]=int(points[7])
+        pointMat = resBoxes[0].reshape([2,4]).T
+        return plg.Polygon( pointMat)
+
+    def rectangle_to_polygon(rect):
+        resBoxes=np.empty([1,8],dtype='int32')
+        resBoxes[0,0]=int(rect.xmin)
+        resBoxes[0,4]=int(rect.ymax)
+        resBoxes[0,1]=int(rect.xmin)
+        resBoxes[0,5]=int(rect.ymin)
+        resBoxes[0,2]=int(rect.xmax)
+        resBoxes[0,6]=int(rect.ymin)
+        resBoxes[0,3]=int(rect.xmax)
+        resBoxes[0,7]=int(rect.ymax)
+
+        pointMat = resBoxes[0].reshape([2,4]).T
+        
+        return plg.Polygon( pointMat)
+    
+    def rectangle_to_points(rect):
+        points = [int(rect.xmin), int(rect.ymax), int(rect.xmax), int(rect.ymax), int(rect.xmax), int(rect.ymin), int(rect.xmin), int(rect.ymin)]
+        return points
+        
+    def get_union(pD,pG):
+        areaA = pD.area();
+        areaB = pG.area();
+        return areaA + areaB - get_intersection(pD, pG);
+        
+    def get_intersection_over_union(pD,pG):
+        try:
+            return get_intersection(pD, pG) / get_union(pD, pG);
+        except:
+            return 0
+        
+    def get_intersection(pD,pG):
+        pInt = pD & pG
+        if len(pInt) == 0:
+            return 0
+        return pInt.area()
+    
+    def compute_ap(confList, matchList,numGtCare):
+        correct = 0
+        AP = 0
+        if len(confList)>0:
+            confList = np.array(confList)
+            matchList = np.array(matchList)
+            sorted_ind = np.argsort(-confList)
+            confList = confList[sorted_ind]
+            matchList = matchList[sorted_ind]
+            for n in range(len(confList)):
+                match = matchList[n]
+                if match:
+                    correct += 1
+                    AP += float(correct)/(n + 1)
+
+            if numGtCare>0:
+                AP /= numGtCare
+            
+        return AP  
+    
+    def transcription_match(transGt,transDet,specialCharacters='!?.:,*"()·[]/\'',onlyRemoveFirstLastCharacterGT=True):
+        
+        if onlyRemoveFirstLastCharacterGT:
+            #special characters in GT are allowed only at initial or final position
+            if (transGt==transDet):
+                return True        
+
+            if specialCharacters.find(transGt[0])>-1:
+                if transGt[1:]==transDet:
+                    return True
+
+            if specialCharacters.find(transGt[-1])>-1:
+                if transGt[0:len(transGt)-1]==transDet:
+                    return True
+
+            if specialCharacters.find(transGt[0])>-1 and specialCharacters.find(transGt[-1])>-1:
+                if transGt[1:len(transGt)-1]==transDet:
+                    return True
+            return False
+        else:
+            #Special characters are removed from the begining and the end of both Detection and GroundTruth
+            while len(transGt)>0 and specialCharacters.find(transGt[0])>-1:
+                transGt = transGt[1:]
+				
+            while len(transDet)>0 and specialCharacters.find(transDet[0])>-1:
+                transDet = transDet[1:]
+                
+            while len(transGt)>0 and specialCharacters.find(transGt[-1])>-1 :
+                transGt = transGt[0:len(transGt)-1]
+                
+            while len(transDet)>0 and specialCharacters.find(transDet[-1])>-1:
+                transDet = transDet[0:len(transDet)-1]
+                
+            return transGt == transDet
+                    
+    
+    def include_in_dictionary(transcription):
+        """
+        Function used in Word Spotting that finds if the Ground Truth transcription meets the rules to enter into the dictionary. If not, the transcription will be cared as don't care
+        """        
+        #special case 's at final
+        if transcription[len(transcription)-2:]=="'s" or transcription[len(transcription)-2:]=="'S":
+            transcription = transcription[0:len(transcription)-2]
+        
+        #hypens at init or final of the word
+        transcription = transcription.strip('-');
+        
+        specialCharacters = "'!?.:,*\"()·[]/";
+        for character in specialCharacters:
+            transcription = transcription.replace(character,' ')
+        
+        transcription = transcription.strip()
+        
+        if len(transcription) != len(transcription.replace(" ","")) :
+            return False;
+        
+        if len(transcription) < evaluationParams['MIN_LENGTH_CARE_WORD']:
+            return False;
+        
+        notAllowed = "×÷·";
+        
+        range1 = [ ord(u'a'), ord(u'z') ]
+        range2 = [ ord(u'A'), ord(u'Z') ]
+        range3 = [ ord(u'À'), ord(u'ƿ') ]
+        range4 = [ ord(u'Ǆ'), ord(u'ɿ') ]
+        range5 = [ ord(u'Ά'), ord(u'Ͽ') ]
+        range6 = [ ord(u'-'), ord(u'-') ]
+        
+        for char in transcription :
+            charCode = ord(char)
+            if(notAllowed.find(char) != -1):
+                return False
+            
+            valid = ( charCode>=range1[0] and charCode<=range1[1] ) or ( charCode>=range2[0] and charCode<=range2[1] ) or ( charCode>=range3[0] and charCode<=range3[1] ) or ( charCode>=range4[0] and charCode<=range4[1] ) or ( charCode>=range5[0] and charCode<=range5[1] ) or ( charCode>=range6[0] and charCode<=range6[1] )
+            if valid == False:
+                return False
+        
+        return True
+    
+    def include_in_dictionary_transcription(transcription):
+        """
+        Function applied to the Ground Truth transcriptions used in Word Spotting. It removes special characters or terminations
+        """
+        #special case 's at final
+        if transcription[len(transcription)-2:]=="'s" or transcription[len(transcription)-2:]=="'S":
+            transcription = transcription[0:len(transcription)-2]
+        
+        #hypens at init or final of the word
+        transcription = transcription.strip('-');            
+        
+        specialCharacters = "'!?.:,*\"()·[]/";
+        for character in specialCharacters:
+            transcription = transcription.replace(character,' ')
+        
+        transcription = transcription.strip()
+        
+        return transcription
+    
+    perSampleMetrics = {}
+    
+    matchedSum = 0
+    det_only_matchedSum = 0
+    
+    Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
+    
+    gt = rrc_evaluation_funcs.load_zip_file(gtFilePath,evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    subm = rrc_evaluation_funcs.load_zip_file(submFilePath,evaluationParams['DET_SAMPLE_NAME_2_ID'],True)
+   
+    numGlobalCareGt = 0;
+    numGlobalCareDet = 0;
+    det_only_numGlobalCareGt = 0;
+    det_only_numGlobalCareDet = 0;
+
+    arrGlobalConfidences = [];
+    arrGlobalMatches = [];
+
+    for resFile in gt:
+        
+        gtFile = rrc_evaluation_funcs.decode_utf8(gt[resFile])
+        if (gtFile is None) :
+            raise Exception("The file %s is not UTF-8" %resFile)        
+
+        recall = 0
+        precision = 0
+        hmean = 0    
+        detCorrect = 0
+        detOnlyCorrect = 0
+        iouMat = np.empty([1,1])
+        gtPols = []
+        detPols = []
+        gtTrans = []
+        detTrans = []
+        gtPolPoints = []
+        detPolPoints = []  
+        gtDontCarePolsNum = [] #Array of Ground Truth Polygons' keys marked as don't Care
+        det_only_gtDontCarePolsNum = []
+        detDontCarePolsNum = [] #Array of Detected Polygons' matched with a don't Care GT
+        det_only_detDontCarePolsNum = []
+        detMatchedNums = []
+        pairs = []
+        
+        arrSampleConfidences = [];
+        arrSampleMatch = [];
+        sampleAP = 0;
+        
+        evaluationLog = ""
+
+        pointsList,_,transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(gtFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True,False)
+        for n in range(len(pointsList)):
+            points = pointsList[n]
+            transcription = transcriptionsList[n]
+            # dontCare = transcription == "###"
+            det_only_dontCare = dontCare = transcription == "###" # ctw1500 and total_text gt have been modified to the same format.
+            if evaluationParams['LTRB']:
+                gtRect = Rectangle(*points)
+                gtPol = rectangle_to_polygon(gtRect)
+            else:
+                gtPol = polygon_from_points(points)
+            gtPols.append(gtPol)
+            gtPolPoints.append(points)
+
+            #On word spotting we will filter some transcriptions with special characters
+            if evaluationParams['WORD_SPOTTING'] :
+                if dontCare == False : 
+                    if include_in_dictionary(transcription) == False : 
+                        dontCare = True
+                    else:
+                        transcription = include_in_dictionary_transcription(transcription)
+
+            gtTrans.append(transcription)
+            if dontCare:
+                gtDontCarePolsNum.append( len(gtPols)-1 ) 
+            if det_only_dontCare:
+                det_only_gtDontCarePolsNum.append( len(gtPols)-1 ) 
+
+        evaluationLog += "GT polygons: " + str(len(gtPols)) + (" (" + str(len(gtDontCarePolsNum)) + " don't care)\n" if len(gtDontCarePolsNum)>0 else "\n")
+        
+        if resFile in subm:
+            
+            detFile = rrc_evaluation_funcs.decode_utf8(subm[resFile]) 
+                    
+            pointsList,confidencesList,transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(detFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True,evaluationParams['CONFIDENCES'])
+            
+            for n in range(len(pointsList)):
+                points = pointsList[n]
+                transcription = transcriptionsList[n]
+                
+                if evaluationParams['LTRB']:
+                    detRect = Rectangle(*points)
+                    detPol = rectangle_to_polygon(detRect)
+                else:                    
+                    detPol = polygon_from_points(points)
+                detPols.append(detPol)
+                detPolPoints.append(points)
+                detTrans.append(transcription)
+
+                if len(gtDontCarePolsNum)>0 :
+                    for dontCarePol in gtDontCarePolsNum:
+                        dontCarePol = gtPols[dontCarePol]
+                        intersected_area = get_intersection(dontCarePol,detPol)
+                        pdDimensions = detPol.area()
+                        precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                        if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT'] ):
+                            detDontCarePolsNum.append( len(detPols)-1 )
+                            break
+                            
+                
+                if len(det_only_gtDontCarePolsNum)>0 :
+                    for dontCarePol in det_only_gtDontCarePolsNum:
+                        dontCarePol = gtPols[dontCarePol]
+                        intersected_area = get_intersection(dontCarePol,detPol)
+                        pdDimensions = detPol.area()
+                        precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                        if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT'] ):
+                            det_only_detDontCarePolsNum.append( len(detPols)-1 )
+                            break
+
+            evaluationLog += "DET polygons: " + str(len(detPols)) + (" (" + str(len(detDontCarePolsNum)) + " don't care)\n" if len(detDontCarePolsNum)>0 else "\n")
+            
+            if len(gtPols)>0 and len(detPols)>0:
+                #Calculate IoU and precision matrixs
+                outputShape=[len(gtPols),len(detPols)]
+                iouMat = np.empty(outputShape)
+                gtRectMat = np.zeros(len(gtPols),np.int8)
+                detRectMat = np.zeros(len(detPols),np.int8)
+                det_only_gtRectMat = np.zeros(len(gtPols),np.int8)
+                det_only_detRectMat = np.zeros(len(detPols),np.int8)
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        pG = gtPols[gtNum]
+                        pD = detPols[detNum]
+                        iouMat[gtNum,detNum] = get_intersection_over_union(pD,pG)
+
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum :
+                            if iouMat[gtNum,detNum]>evaluationParams['IOU_CONSTRAINT']:
+                                gtRectMat[gtNum] = 1
+                                detRectMat[detNum] = 1
+                                #detection matched only if transcription is equal
+                                if evaluationParams['WORD_SPOTTING']:
+                                    correct = gtTrans[gtNum].upper() == detTrans[detNum].upper()
+                                else:
+                                    correct = transcription_match(gtTrans[gtNum].upper(),detTrans[detNum].upper(),evaluationParams['SPECIAL_CHARACTERS'],evaluationParams['ONLY_REMOVE_FIRST_LAST_CHARACTER'])==True
+                                detCorrect += (1 if correct else 0)
+                                if correct:
+                                    detMatchedNums.append(detNum)
+                                pairs.append({'gt':gtNum,'det':detNum,'correct':correct})
+                                evaluationLog += "Match GT #" + str(gtNum) + " with Det #" + str(detNum) + " trans. correct: " + str(correct) + "\n"
+
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        if det_only_gtRectMat[gtNum] == 0 and det_only_detRectMat[detNum] == 0 and gtNum not in det_only_gtDontCarePolsNum and detNum not in det_only_detDontCarePolsNum:
+                            if iouMat[gtNum,detNum]>evaluationParams['IOU_CONSTRAINT']:
+                                det_only_gtRectMat[gtNum] = 1
+                                det_only_detRectMat[detNum] = 1
+                                #detection matched only if transcription is equal
+                                det_only_correct = True
+                                detOnlyCorrect += 1
+
+            if evaluationParams['CONFIDENCES']:
+                for detNum in range(len(detPols)):
+                    if detNum not in detDontCarePolsNum :
+                        #we exclude the don't care detections
+                        match = detNum in detMatchedNums
+
+                        arrSampleConfidences.append(confidencesList[detNum])
+                        arrSampleMatch.append(match)
+
+                        arrGlobalConfidences.append(confidencesList[detNum]);
+                        arrGlobalMatches.append(match);                                
+                
+        numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
+        numDetCare = (len(detPols) - len(detDontCarePolsNum))
+        det_only_numGtCare = (len(gtPols) - len(det_only_gtDontCarePolsNum))
+        det_only_numDetCare = (len(detPols) - len(det_only_detDontCarePolsNum))
+        if numGtCare == 0:
+            recall = float(1)
+            precision = float(0) if numDetCare >0 else float(1)
+            sampleAP = precision
+        else:
+            recall = float(detCorrect) / numGtCare
+            precision = 0 if numDetCare==0 else float(detCorrect) / numDetCare
+            if evaluationParams['CONFIDENCES']:
+                sampleAP = compute_ap(arrSampleConfidences, arrSampleMatch, numGtCare )                    
+
+        if det_only_numGtCare == 0:
+            det_only_recall = float(1)
+            det_only_precision = float(0) if det_only_numDetCare >0 else float(1)
+        else:
+            det_only_recall = float(detOnlyCorrect) / det_only_numGtCare
+            det_only_precision = 0 if det_only_numDetCare==0 else float(detOnlyCorrect) / det_only_numDetCare
+
+        hmean = 0 if (precision + recall)==0 else 2.0 * precision * recall / (precision + recall)
+        det_only_hmean = 0 if (det_only_precision + det_only_recall)==0 else 2.0 * det_only_precision * det_only_recall / (det_only_precision + det_only_recall)
+
+        matchedSum += detCorrect
+        det_only_matchedSum += detOnlyCorrect
+        numGlobalCareGt += numGtCare
+        numGlobalCareDet += numDetCare
+        det_only_numGlobalCareGt += det_only_numGtCare
+        det_only_numGlobalCareDet += det_only_numDetCare
+
+        perSampleMetrics[resFile] = {
+                                        'precision':precision,
+                                        'recall':recall,
+                                        'hmean':hmean,
+                                        'pairs':pairs,
+                                        'AP':sampleAP,
+                                        'iouMat':[] if len(detPols)>100 else iouMat.tolist(),
+                                        'gtPolPoints':gtPolPoints,
+                                        'detPolPoints':detPolPoints,
+                                        'gtTrans':gtTrans,
+                                        'detTrans':detTrans,
+                                        'gtDontCare':gtDontCarePolsNum,
+                                        'detDontCare':detDontCarePolsNum,
+                                        'evaluationParams': evaluationParams,
+                                        'evaluationLog': evaluationLog     
+                                    }
+        
+    # Compute AP
+    AP = 0
+    if evaluationParams['CONFIDENCES']:
+        AP = compute_ap(arrGlobalConfidences, arrGlobalMatches, numGlobalCareGt)
+
+    methodRecall = 0 if numGlobalCareGt == 0 else float(matchedSum)/numGlobalCareGt
+    methodPrecision = 0 if numGlobalCareDet == 0 else float(matchedSum)/numGlobalCareDet
+    methodHmean = 0 if methodRecall + methodPrecision==0 else 2* methodRecall * methodPrecision / (methodRecall + methodPrecision)
+
+    det_only_methodRecall = 0 if det_only_numGlobalCareGt == 0 else float(det_only_matchedSum)/det_only_numGlobalCareGt
+    det_only_methodPrecision = 0 if det_only_numGlobalCareDet == 0 else float(det_only_matchedSum)/det_only_numGlobalCareDet
+    det_only_methodHmean = 0 if det_only_methodRecall + det_only_methodPrecision==0 else 2* det_only_methodRecall * det_only_methodPrecision / (det_only_methodRecall + det_only_methodPrecision)
+
+    methodMetrics = r"E2E_RESULTS: precision: {}, recall: {}, hmean: {}".format(methodPrecision, methodRecall, methodHmean)
+    det_only_methodMetrics = r"DETECTION_ONLY_RESULTS: precision: {}, recall: {}, hmean: {}".format(det_only_methodPrecision, det_only_methodRecall, det_only_methodHmean)
+
+    resDict = {'calculated':True,'Message':'','e2e_method': methodMetrics, 'det_only_method': det_only_methodMetrics, 'per_sample': perSampleMetrics}
+    
+    
+    return resDict;
+
+
+
+def text_eval_main_ic15(det_file, gt_file, is_word_spotting):
+    global WORD_SPOTTING
+    WORD_SPOTTING = is_word_spotting
+    p = {
+        'g': gt_file,  
+        's': det_file
+    }
+    return rrc_evaluation_funcs.main_evaluation(p,default_evaluation_params,validate_data,evaluate_method)
\ No newline at end of file
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/text_evaluation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/text_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a44925fdd7b55ff2867381985e87c99d2cd6a59
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/evaluation/text_evaluation.py
@@ -0,0 +1,684 @@
+import contextlib
+import copy
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import re
+import torch
+from collections import OrderedDict
+from fvcore.common.file_io import PathManager
+from pycocotools.coco import COCO
+
+from detectron2.utils import comm
+from detectron2.data import MetadataCatalog
+from detectron2.evaluation.evaluator import DatasetEvaluator
+
+import glob
+import shutil
+from shapely.geometry import Polygon, LinearRing
+from detectron2.evaluation import text_eval_script
+from detectron2.evaluation import text_eval_script_ic15
+import zipfile
+import pickle
+import cv2
+import editdistance
+class TextEvaluator(DatasetEvaluator):
+    """
+    Evaluate text proposals and recognition.
+    """
+
+    def __init__(self, dataset_name, cfg, distributed, output_dir=None):
+        self._tasks = ("polygon", "recognition")
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+
+        self._metadata = MetadataCatalog.get(dataset_name)
+        if not hasattr(self._metadata, "json_file"):
+            raise AttributeError(
+                f"json_file was not found in MetaDataCatalog for '{dataset_name}'."
+            )
+        
+        CTLABELS = [" ","!",'"',"#","$","%","&","'","(",")","*","+",",","-",".","/","0","1","2","3","4","5","6","7","8","9",":",";","<","=",">","?","@","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","[","\\","]","^","_","`","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","{","|","}","~","ˋ","ˊ","﹒","ˀ","˜","ˇ","ˆ","˒","‑",'´', "~"]
+
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        with contextlib.redirect_stdout(io.StringIO()):
+            self._coco_api = COCO(json_file)
+
+        self.dataset_name = dataset_name
+        # use dataset_name to decide eval_gt_path
+        self.lexicon_type = 3
+        if "totaltext" in dataset_name:
+            self._text_eval_gt_path = "datasets/evaluation/gt_totaltext.zip"
+            self._word_spotting = True
+            self.dataset_name = "totaltext"
+        elif "ctw1500" in dataset_name:
+            self._text_eval_gt_path = "datasets/evaluation/gt_ctw1500.zip"
+            self._word_spotting = False
+            self.dataset_name = "ctw1500"
+        elif "icdar2015" in dataset_name:
+            self._text_eval_gt_path = "datasets/evaluation/gt_icdar2015.zip"
+            self._word_spotting = False
+            self.dataset_name = "icdar2015"
+        elif "vintext" in dataset_name:
+            self.lexicon_type = None
+            self._text_eval_gt_path = "datasets/evaluation/gt_fimotext.zip"
+            self._word_spotting = True
+        elif "custom" in dataset_name:
+            self._text_eval_gt_path = "datasets/evaluation/gt_custom.zip"
+            self._word_spotting = False
+        self._text_eval_confidence = cfg.TEST.INFERENCE_TH_TEST
+        self.nms_enable = cfg.TEST.USE_NMS_IN_TSET
+
+    def reset(self):
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+            instances = output["instances"].to(self._cpu_device)
+            prediction["instances"] = self.instances_to_coco_json(instances, input)
+            self._predictions.append(prediction)
+
+    def to_eval_format(self, file_path, temp_dir="temp_det_results", cf_th=0.5):
+        def fis_ascii(s):
+            a = (ord(c) < 128 for c in s)
+            return all(a)
+
+        def de_ascii(s):
+            a = [c for c in s if ord(c) < 128]
+            outa = ''
+            for i in a:
+                outa +=i
+            return outa
+
+        with open(file_path, 'r') as f:
+            data = json.load(f)
+            with open('temp_all_det_cors.txt', 'w') as f2:
+                for ix in range(len(data)):
+                    if data[ix]['score'] > 0.1:
+                        outstr = '{}: '.format(data[ix]['image_id'])
+                        xmin = 1000000
+                        ymin = 1000000
+                        xmax = 0 
+                        ymax = 0
+                        for i in range(len(data[ix]['polys'])):
+                            outstr = outstr + str(int(data[ix]['polys'][i][0])) +','+str(int(data[ix]['polys'][i][1])) +','
+                        if not "vintext" in self.dataset_name:
+                            ass = de_ascii(data[ix]['rec'])
+                        else:
+                            ass = data[ix]['rec']
+                        if len(ass)>=0: # 
+                            outstr = outstr + str(round(data[ix]['score'], 3)) +',####'+ass+'\n'	
+                            f2.writelines(outstr)
+                f2.close()
+        dirn = temp_dir
+        lsc = [cf_th] 
+        fres = open('temp_all_det_cors.txt', 'r').readlines()
+        for isc in lsc:	
+            if not os.path.isdir(dirn):
+                os.mkdir(dirn)
+
+            for line in fres:
+                line = line.strip()
+                s = line.split(': ')
+                filename = '{:07d}.txt'.format(int(s[0]))
+                outName = os.path.join(dirn, filename)
+                with open(outName, 'a') as fout:
+                    ptr = s[1].strip().split(',####')
+                    score = ptr[0].split(',')[-1]
+                    if float(score) < isc:
+                        continue
+                    cors = ','.join(e for e in ptr[0].split(',')[:-1])
+                    fout.writelines(cors+',####'+ptr[1]+'\n')
+        os.remove("temp_all_det_cors.txt")
+
+    def sort_detection(self, temp_dir):
+        origin_file = temp_dir
+        output_file = "final_"+temp_dir
+        output_file_full = "full_final_"+temp_dir
+        if not os.path.isdir(output_file_full):
+            os.mkdir(output_file_full)
+        if not os.path.isdir(output_file):
+            os.mkdir(output_file)
+        files = glob.glob(origin_file+'*.txt')
+        files.sort()
+        if "totaltext" in self.dataset_name:
+            if not self.lexicon_type == None:
+                lexicon_path = 'datasets/totaltext/weak_voc_new.txt'
+                lexicon_fid=open(lexicon_path, 'r')
+                pair_list = open('datasets/totaltext/weak_voc_pair_list.txt', 'r')
+                pairs = dict()
+                for line in pair_list.readlines():
+                    line=line.strip()
+                    word = line.split(' ')[0].upper()
+                    word_gt = line[len(word)+1:]
+                    pairs[word] = word_gt
+                lexicon_fid=open(lexicon_path, 'r')
+                lexicon=[]
+                for line in lexicon_fid.readlines():
+                    line=line.strip()
+                    lexicon.append(line)
+        elif "ctw1500" in self.dataset_name:
+            if not self.lexicon_type == None:
+                lexicon_path = 'datasets/CTW1500/weak_voc_new.txt'
+                lexicon_fid=open(lexicon_path, 'r')
+                pair_list = open('datasets/CTW1500/weak_voc_pair_list.txt', 'r')
+                pairs = dict()
+                lexicon_fid=open(lexicon_path, 'r')
+                lexicon=[]
+                for line in lexicon_fid.readlines():
+                    line=line.strip()
+                    lexicon.append(line)
+                    pairs[line.upper()] = line
+        elif "icdar2015" in self.dataset_name:
+            if self.lexicon_type==1: 
+                # generic lexicon
+                lexicon_path = 'datasets/icdar2015/GenericVocabulary_new.txt'
+                lexicon_fid=open(lexicon_path, 'r')
+                pair_list = open('datasets/icdar2015/GenericVocabulary_pair_list.txt', 'r')
+                pairs = dict()
+                for line in pair_list.readlines():
+                    line=line.strip()
+                    word = line.split(' ')[0].upper()
+                    word_gt = line[len(word)+1:]
+                    pairs[word] = word_gt
+                lexicon_fid=open(lexicon_path, 'r')
+                lexicon=[]
+                for line in lexicon_fid.readlines():
+                    line=line.strip()
+                    lexicon.append(line)
+            if self.lexicon_type==2:
+                # weak lexicon
+                lexicon_path = 'datasets/icdar2015/ch4_test_vocabulary_new.txt'
+                lexicon_fid=open(lexicon_path, 'r')
+                pair_list = open('datasets/icdar2015/ch4_test_vocabulary_pair_list.txt', 'r')
+                pairs = dict()
+                for line in pair_list.readlines():
+                    line=line.strip()
+                    word = line.split(' ')[0].upper()
+                    word_gt = line[len(word)+1:]
+                    pairs[word] = word_gt
+                lexicon_fid=open(lexicon_path, 'r')
+                lexicon=[]
+                for line in lexicon_fid.readlines():
+                    line=line.strip()
+                    lexicon.append(line)
+
+        def find_match_word(rec_str, pairs, lexicon=None):
+            rec_str = rec_str.upper()
+            dist_min = 100
+            dist_min_pre = 100
+            match_word = ''
+            match_dist = 100
+            for word in lexicon:
+                word = word.upper()
+                ed = editdistance.eval(rec_str, word)
+                length_dist = abs(len(word) - len(rec_str))
+                dist = ed
+                if dist<dist_min:
+                    dist_min = dist
+                    match_word = pairs[word]
+                    match_dist = dist
+            return match_word, match_dist
+        for i in files:
+            if "icdar2015" in self.dataset_name:
+                out = output_file + 'res_img_' + str(int(i.split('/')[-1].split('.')[0])) + '.txt'
+                out_full = output_file_full + 'res_img_' + str(int(i.split('/')[-1].split('.')[0])) + '.txt'
+                if self.lexicon_type==3:
+                    lexicon_path = 'datasets/icdar2015/new_strong_lexicon/new_voc_img_' + str(int(i.split('/')[-1].split('.')[0])) + '.txt'
+                    lexicon_fid=open(lexicon_path, 'r')
+                    pair_list = open('datasets/icdar2015/new_strong_lexicon/pair_voc_img_' + str(int(i.split('/')[-1].split('.')[0])) + '.txt')
+                    pairs = dict()
+                    for line in pair_list.readlines():
+                        line=line.strip()
+                        word = line.split(' ')[0].upper()
+                        word_gt = line[len(word)+1:]
+                        pairs[word] = word_gt
+                    lexicon_fid=open(lexicon_path, 'r')
+                    lexicon=[]
+                    for line in lexicon_fid.readlines():
+                        line=line.strip()
+                        lexicon.append(line)
+            else:
+                out = i.replace(origin_file, output_file)
+                out_full = i.replace(origin_file, output_file_full)
+            fin = open(i, 'r').readlines()
+            fout = open(out, 'w')
+            fout_full = open(out_full, 'w')
+            for iline, line in enumerate(fin):
+                ptr = line.strip().split(',####')
+                rec  = ptr[1]
+                cors = ptr[0].split(',')
+                assert(len(cors) %2 == 0), 'cors invalid.'
+                pts = [(int(cors[j]), int(cors[j+1])) for j in range(0,len(cors),2)]
+                try:
+                    pgt = Polygon(pts)
+                except Exception as e:
+                    print(e)
+                    print('An invalid detection in {} line {} is removed ... '.format(i, iline))
+                    continue
+                
+                if not pgt.is_valid:
+                    print('An invalid detection in {} line {} is removed ... '.format(i, iline))
+                    continue
+                    
+                pRing = LinearRing(pts)
+                if not "icdar2015" in self.dataset_name:
+                    if pRing.is_ccw:
+                        pts.reverse()
+                outstr = ''
+                for ipt in pts[:-1]:
+                    outstr += (str(int(ipt[0]))+','+ str(int(ipt[1]))+',')
+                outstr += (str(int(pts[-1][0]))+','+ str(int(pts[-1][1])))
+                pts = outstr
+                if "icdar2015" in self.dataset_name:
+                    outstr = outstr + ',' + rec
+                else:
+                    outstr = outstr + ',####' + rec
+                fout.writelines(outstr+'\n')
+                if self.lexicon_type is None:
+                    rec_full = rec
+                else:
+                    match_word, match_dist = find_match_word(rec,pairs,lexicon)
+                    if match_dist<1.5:
+                        rec_full = match_word
+                        if "icdar2015" in self.dataset_name:
+                            pts = pts + ',' + rec_full
+                        else:
+                            pts = pts + ',####' + rec_full
+                        fout_full.writelines(pts+'\n')
+            fout.close()
+            fout_full.close()
+        def zipdir(path, ziph):
+            # ziph is zipfile handle
+            for root, dirs, files in os.walk(path):
+                for file in files:
+                    ziph.write(os.path.join(root, file))
+        if "icdar2015" in self.dataset_name:
+            os.system('zip -r -q -j '+'det.zip'+' '+output_file+'/*')
+            os.system('zip -r -q -j '+'det_full.zip'+' '+output_file_full+'/*')
+            shutil.rmtree(origin_file)
+            shutil.rmtree(output_file)
+            shutil.rmtree(output_file_full)
+            return "det.zip", "det_full.zip"
+        else:
+            os.chdir(output_file)
+            zipf = zipfile.ZipFile('../det.zip', 'w', zipfile.ZIP_DEFLATED)
+            zipdir('./', zipf)
+            zipf.close()
+            os.chdir("../")
+
+            os.chdir(output_file_full)
+            zipf_full = zipfile.ZipFile('../det_full.zip', 'w', zipfile.ZIP_DEFLATED)
+            zipdir('./', zipf_full)
+            zipf_full.close()
+            os.chdir("../")
+            # clean temp files
+            shutil.rmtree(origin_file)
+            shutil.rmtree(output_file)
+            shutil.rmtree(output_file_full)
+            return "det.zip", "det_full.zip"
+    
+    def evaluate_with_official_code(self, result_path, gt_path):
+        if "icdar2015" in self.dataset_name:
+            return text_eval_script_ic15.text_eval_main_ic15(det_file=result_path, gt_file=gt_path, is_word_spotting=self._word_spotting)
+        else:
+            return text_eval_script.text_eval_main(det_file=result_path, gt_file=gt_path, is_word_spotting=self._word_spotting)
+
+    def evaluate(self):
+        if self._distributed:
+            comm.synchronize()
+            predictions = comm.gather(self._predictions, dst=0)
+            predictions = list(itertools.chain(*predictions))
+
+            if not comm.is_main_process():
+                return {}
+        else:
+            predictions = self._predictions
+
+        if len(predictions) == 0:
+            self._logger.warning("[COCOEvaluator] Did not receive valid predictions.")
+            return {}
+
+        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        PathManager.mkdirs(self._output_dir)
+        file_path = os.path.join(self._output_dir, "text_results.json")
+        self._logger.info("Saving results to {}".format(file_path))
+        with PathManager.open(file_path, "w", encoding='utf-8') as f:
+            f.write(str(json.dumps(coco_results, ensure_ascii = True)))
+            f.flush()
+        self._results = OrderedDict()
+        # eval text
+        if not self._text_eval_gt_path:
+            return copy.deepcopy(self._results)
+        temp_dir = "temp_det_results/"
+        self.to_eval_format(file_path, temp_dir, self._text_eval_confidence)
+        result_path, result_path_full = self.sort_detection(temp_dir)
+        text_result = self.evaluate_with_official_code(result_path, self._text_eval_gt_path) # None 
+        text_result["e2e_method"] = "None-" + text_result["e2e_method"]
+        if not self.lexicon_type == None:
+            dict_lexicon = {"1": "Generic", "2": "Weak", "3": "Strong"}
+            text_result_full = self.evaluate_with_official_code(result_path_full, self._text_eval_gt_path) # with lexicon
+            text_result_full["e2e_method"] = dict_lexicon[str(self.lexicon_type)] + "-" + text_result_full["e2e_method"]
+        # os.remove(result_path)
+        # os.remove(result_path_full)
+        # parse
+        template = "(\S+): (\S+): (\S+), (\S+): (\S+), (\S+): (\S+)"
+        result = text_result["det_only_method"]
+        groups = re.match(template, result).groups()
+        self._results[groups[0]] = {groups[i*2+1]: float(groups[(i+1)*2]) for i in range(3)}
+        result = text_result["e2e_method"]
+        groups = re.match(template, result).groups()
+        self._results[groups[0]] = {groups[i*2+1]: float(groups[(i+1)*2]) for i in range(3)}
+        if not self.lexicon_type == None:
+            result = text_result_full["e2e_method"]
+            groups = re.match(template, result).groups()
+            self._results[groups[0]] = {groups[i*2+1]: float(groups[(i+1)*2]) for i in range(3)}
+
+        return copy.deepcopy(self._results)
+
+
+    def instances_to_coco_json(self, instances, inputs):
+        img_id = inputs["image_id"]
+        width = inputs['width']
+        height = inputs['height']
+        num_instances = len(instances)
+        if num_instances == 0:
+            return []
+        scores = instances.scores.tolist()
+        masks = np.asarray(instances.pred_masks)
+        masks = [GenericMask(x, height, width) for x in masks]
+        recs = instances.pred_rec.numpy()
+
+        if self.nms_enable:
+            polys = []
+            for mask in masks:
+                if not len(mask.polygons):
+                    continue
+                polys.append(np.concatenate(mask.polygons).reshape(-1,2))
+            keep = self.py_cpu_pnms(polys,scores,0.5)
+
+        results = []
+        i = 0
+        for mask, rec, score in zip(masks, recs, scores):
+            # if rec > 0.3:
+            if not len(mask.polygons):
+                continue
+            if self.nms_enable:
+                if i not in keep:
+                    i = i+1
+                    continue
+            poly = polys[i]
+            if 'icdar2015'  in self.dataset_name:
+                poly = polygon2rbox(poly, height, width)
+                poly = np.array(poly)
+            rec_string = self.decode(rec)
+            if not len(rec_string):
+                i = i+1
+                continue
+            result = {
+                "image_id": img_id,
+                "category_id": 1,
+                "polys": poly.tolist(),
+                "rec": rec_string,
+                "score": score,
+            }
+            results.append(result)
+            i = i+1
+        return results
+  
+    def decode(self, rec):
+        CTLABELS = [" ","!",'"',"#","$","%","&","'","(",")","*","+",",","-",".","/","0","1","2","3","4","5","6","7","8","9",":",";","<","=",">","?","@","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","[","\\","]","^","_","`","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","{","|","}","~","ˋ","ˊ","﹒","ˀ","˜","ˇ","ˆ","˒","‑",'´', "~"]
+        s = ''
+        tmp = []
+        for i in range(len(rec)-1):
+            if i == 0:
+                tmp.append(rec[i])
+            else:
+                if rec[i] != rec[i-1]:
+                    tmp.append(rec[i])
+        for c in tmp:
+            c = int(c)
+            if 0<c < len(CTLABELS):
+                if not "ctw1500" in self.dataset_name and not "vintext" in self.dataset_name:
+                    print('a')
+                    if CTLABELS[c-1] in "_0123456789abcdefghijklmnopqrstuvwxyz":
+                        s += CTLABELS[c-1]
+                else:
+                    # print('b')
+                    s += CTLABELS[c-1]
+            else:
+                s += u''
+        if "vintext" in self.dataset_name:
+            s = vintext_decoder(s)
+        return s
+
+    def py_cpu_pnms(self, dets, scores, thresh):
+        pts = dets
+        scores = np.array(scores)
+        order = scores.argsort()[::-1]
+        areas = np.zeros(scores.shape)
+        order = scores.argsort()[::-1]
+        inter_areas = np.zeros((scores.shape[0], scores.shape[0]))
+        for il in range(len(pts)):
+            poly = Polygon(pts[il]).buffer(0.001)
+            areas[il] = poly.area
+            for jl in range(il, len(pts)):
+                polyj = Polygon(pts[jl].tolist()).buffer(0.001)
+                inS = poly.intersection(polyj)
+                try:
+                    inter_areas[il][jl] = inS.area
+                except:
+                    import pdb;pdb.set_trace()
+                inter_areas[jl][il] = inS.area
+
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+
+            ovr = inter_areas[i][order[1:]] / ((areas[i]) + areas[order[1:]] - inter_areas[i][order[1:]])
+            inds = np.where(ovr <= thresh)[0]
+            order = order[inds + 1]
+
+        return keep
+
+def polygon2rbox(polygon, image_height, image_width):
+    poly = np.array(polygon).reshape((-1, 2)).astype(np.float32)
+    rect = cv2.minAreaRect(poly)
+    corners = cv2.boxPoints(rect)
+    corners = np.array(corners, dtype="int")
+    pts = get_tight_rect(corners, 0, 0, image_height, image_width, 1)
+    pts = np.array(pts).reshape(-1,2)
+    pts = pts.tolist()
+    return pts
+
+def get_tight_rect(points, start_x, start_y, image_height, image_width, scale):
+    points = list(points)
+    ps = sorted(points, key=lambda x: x[0])
+
+    if ps[1][1] > ps[0][1]:
+        px1 = ps[0][0] * scale + start_x
+        py1 = ps[0][1] * scale + start_y
+        px4 = ps[1][0] * scale + start_x
+        py4 = ps[1][1] * scale + start_y
+    else:
+        px1 = ps[1][0] * scale + start_x
+        py1 = ps[1][1] * scale + start_y
+        px4 = ps[0][0] * scale + start_x
+        py4 = ps[0][1] * scale + start_y
+    if ps[3][1] > ps[2][1]:
+        px2 = ps[2][0] * scale + start_x
+        py2 = ps[2][1] * scale + start_y
+        px3 = ps[3][0] * scale + start_x
+        py3 = ps[3][1] * scale + start_y
+    else:
+        px2 = ps[3][0] * scale + start_x
+        py2 = ps[3][1] * scale + start_y
+        px3 = ps[2][0] * scale + start_x
+        py3 = ps[2][1] * scale + start_y
+
+    px1 = min(max(px1, 1), image_width - 1)
+    px2 = min(max(px2, 1), image_width - 1)
+    px3 = min(max(px3, 1), image_width - 1)
+    px4 = min(max(px4, 1), image_width - 1)
+    py1 = min(max(py1, 1), image_height - 1)
+    py2 = min(max(py2, 1), image_height - 1)
+    py3 = min(max(py3, 1), image_height - 1)
+    py4 = min(max(py4, 1), image_height - 1)
+    return [px1, py1, px2, py2, px3, py3, px4, py4]
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (height, width), m.shape
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        #res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+dictionary = "aàáạảãâầấậẩẫăằắặẳẵAÀÁẠẢÃĂẰẮẶẲẴÂẦẤẬẨẪeèéẹẻẽêềếệểễEÈÉẸẺẼÊỀẾỆỂỄoòóọỏõôồốộổỗơờớợởỡOÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠiìíịỉĩIÌÍỊỈĨuùúụủũưừứựửữƯỪỨỰỬỮUÙÚỤỦŨyỳýỵỷỹYỲÝỴỶỸ"
+
+
+def make_groups():
+    groups = []
+    i = 0
+    while i < len(dictionary) - 5:
+        group = [c for c in dictionary[i : i + 6]]
+        i += 6
+        groups.append(group)
+    return groups
+
+
+groups = make_groups()
+
+TONES = ["", "ˋ", "ˊ", "﹒", "ˀ", "˜"]
+SOURCES = ["ă", "â", "Ă", "Â", "ê", "Ê", "ô", "ơ", "Ô", "Ơ", "ư", "Ư", "Đ", "đ"]
+TARGETS = ["aˇ", "aˆ", "Aˇ", "Aˆ", "eˆ", "Eˆ", "oˆ", "o˒", "Oˆ", "O˒", "u˒", "U˒", "D-", "d‑"]
+
+
+def correct_tone_position(word):
+    word = word[:-1]
+    if len(word) < 2:
+        pass
+    first_ord_char = ""
+    second_order_char = ""
+    for char in word:
+        for group in groups:
+            if char in group:
+                second_order_char = first_ord_char
+                first_ord_char = group[0]
+    if word[-1] == first_ord_char and second_order_char != "":
+        pair_chars = ["qu", "Qu", "qU", "QU", "gi", "Gi", "gI", "GI"]
+        for pair in pair_chars:
+            if pair in word and second_order_char in ["u", "U", "i", "I"]:
+                return first_ord_char
+        return second_order_char
+    return first_ord_char
+
+
+def vintext_decoder(recognition):
+    for char in TARGETS:
+        recognition = recognition.replace(char, SOURCES[TARGETS.index(char)])
+    if len(recognition) < 1:
+        return recognition
+    if recognition[-1] in TONES:
+        if len(recognition) < 2:
+            return recognition
+        replace_char = correct_tone_position(recognition)
+        tone = recognition[-1]
+        recognition = recognition[:-1]
+        for group in groups:
+            if replace_char in group:
+                recognition = recognition.replace(replace_char, group[TONES.index(tone)])
+    return recognition
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..78c27d64fa42760eeacd14d241cf28d58e3da490
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+from .api import *
+from .flatten import TracingAdapter
+from .torchscript import scripting_with_instances, dump_torchscript_IR
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/api.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/api.py
new file mode 100644
index 0000000000000000000000000000000000000000..e80989231ea5233e40f48a76e375a5a3c39208b1
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/api.py
@@ -0,0 +1,273 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import os
+import torch
+from caffe2.proto import caffe2_pb2
+from torch import nn
+
+from detectron2.config import CfgNode
+from detectron2.utils.file_io import PathManager
+
+from .caffe2_inference import ProtobufDetectionModel
+from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
+from .shared import get_pb_arg_vali, get_pb_arg_vals, save_graph
+
+__all__ = [
+    "add_export_config",
+    "export_caffe2_model",
+    "Caffe2Model",
+    "export_onnx_model",
+    "Caffe2Tracer",
+]
+
+
+def add_export_config(cfg):
+    """
+    Add options needed by caffe2 export.
+
+    Args:
+        cfg (CfgNode): a detectron2 config
+
+    Returns:
+        CfgNode:
+            an updated config with new options that will be used by :class:`Caffe2Tracer`.
+    """
+    is_frozen = cfg.is_frozen()
+    cfg.defrost()
+    cfg.EXPORT_CAFFE2 = CfgNode()
+    cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT = False
+    if is_frozen:
+        cfg.freeze()
+    return cfg
+
+
+class Caffe2Tracer:
+    """
+    Make a detectron2 model traceable with Caffe2 operators.
+    This class creates a traceable version of a detectron2 model which:
+
+    1. Rewrite parts of the model using ops in Caffe2. Note that some ops do
+       not have GPU implementation in Caffe2.
+    2. Remove post-processing and only produce raw layer outputs
+
+    After making a traceable model, the class provide methods to export such a
+    model to different deployment formats.
+    Exported graph produced by this class take two input tensors:
+
+    1. (1, C, H, W) float "data" which is an image (usually in [0, 255]).
+       (H, W) often has to be padded to multiple of 32 (depend on the model
+       architecture).
+    2. 1x3 float "im_info", each row of which is (height, width, 1.0).
+       Height and width are true image shapes before padding.
+
+    The class currently only supports models using builtin meta architectures.
+    Batch inference is not supported, and contributions are welcome.
+    """
+
+    def __init__(self, cfg: CfgNode, model: nn.Module, inputs):
+        """
+        Args:
+            cfg (CfgNode): a detectron2 config, with extra export-related options
+                added by :func:`add_export_config`. It's used to construct
+                caffe2-compatible model.
+            model (nn.Module): An original pytorch model. Must be among a few official models
+                in detectron2 that can be converted to become caffe2-compatible automatically.
+                Weights have to be already loaded to this model.
+            inputs: sample inputs that the given model takes for inference.
+                Will be used to trace the model. For most models, random inputs with
+                no detected objects will not work as they lead to wrong traces.
+        """
+        assert isinstance(cfg, CfgNode), cfg
+        assert isinstance(model, torch.nn.Module), type(model)
+
+        if "EXPORT_CAFFE2" not in cfg:
+            cfg = add_export_config(cfg)  # will just the defaults
+        # TODO make it support custom models, by passing in c2 model directly
+        C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[cfg.MODEL.META_ARCHITECTURE]
+        self.traceable_model = C2MetaArch(cfg, copy.deepcopy(model))
+        self.inputs = inputs
+        self.traceable_inputs = self.traceable_model.get_caffe2_inputs(inputs)
+
+    def export_caffe2(self):
+        """
+        Export the model to Caffe2's protobuf format.
+        The returned object can be saved with its :meth:`.save_protobuf()` method.
+        The result can be loaded and executed using Caffe2 runtime.
+
+        Returns:
+            :class:`Caffe2Model`
+        """
+        from .caffe2_export import export_caffe2_detection_model
+
+        predict_net, init_net = export_caffe2_detection_model(
+            self.traceable_model, self.traceable_inputs
+        )
+        return Caffe2Model(predict_net, init_net)
+
+    def export_onnx(self):
+        """
+        Export the model to ONNX format.
+        Note that the exported model contains custom ops only available in caffe2, therefore it
+        cannot be directly executed by other runtime (such as onnxruntime or TensorRT).
+        Post-processing or transformation passes may be applied on the model to accommodate
+        different runtimes, but we currently do not provide support for them.
+
+        Returns:
+            onnx.ModelProto: an onnx model.
+        """
+        from .caffe2_export import export_onnx_model as export_onnx_model_impl
+
+        return export_onnx_model_impl(self.traceable_model, (self.traceable_inputs,))
+
+    def export_torchscript(self):
+        """
+        Export the model to a ``torch.jit.TracedModule`` by tracing.
+        The returned object can be saved to a file by ``.save()``.
+
+        Returns:
+            torch.jit.TracedModule: a torch TracedModule
+        """
+        logger = logging.getLogger(__name__)
+        logger.info("Tracing the model with torch.jit.trace ...")
+        with torch.no_grad():
+            return torch.jit.trace(self.traceable_model, (self.traceable_inputs,))
+
+
+class Caffe2Model(nn.Module):
+    """
+    A wrapper around the traced model in Caffe2's protobuf format.
+    The exported graph has different inputs/outputs from the original Pytorch
+    model, as explained in :class:`Caffe2Tracer`. This class wraps around the
+    exported graph to simulate the same interface as the original Pytorch model.
+    It also provides functions to save/load models in Caffe2's format.'
+
+    Examples:
+    ::
+        c2_model = Caffe2Tracer(cfg, torch_model, inputs).export_caffe2()
+        inputs = [{"image": img_tensor_CHW}]
+        outputs = c2_model(inputs)
+        orig_outputs = torch_model(inputs)
+    """
+
+    def __init__(self, predict_net, init_net):
+        super().__init__()
+        self.eval()  # always in eval mode
+        self._predict_net = predict_net
+        self._init_net = init_net
+        self._predictor = None
+
+    __init__.__HIDE_SPHINX_DOC__ = True
+
+    @property
+    def predict_net(self):
+        """
+        caffe2.core.Net: the underlying caffe2 predict net
+        """
+        return self._predict_net
+
+    @property
+    def init_net(self):
+        """
+        caffe2.core.Net: the underlying caffe2 init net
+        """
+        return self._init_net
+
+    def save_protobuf(self, output_dir):
+        """
+        Save the model as caffe2's protobuf format.
+        It saves the following files:
+
+            * "model.pb": definition of the graph. Can be visualized with
+              tools like `netron <https://github.com/lutzroeder/netron>`_.
+            * "model_init.pb": model parameters
+            * "model.pbtxt": human-readable definition of the graph. Not
+              needed for deployment.
+
+        Args:
+            output_dir (str): the output directory to save protobuf files.
+        """
+        logger = logging.getLogger(__name__)
+        logger.info("Saving model to {} ...".format(output_dir))
+        if not PathManager.exists(output_dir):
+            PathManager.mkdirs(output_dir)
+
+        with PathManager.open(os.path.join(output_dir, "model.pb"), "wb") as f:
+            f.write(self._predict_net.SerializeToString())
+        with PathManager.open(os.path.join(output_dir, "model.pbtxt"), "w") as f:
+            f.write(str(self._predict_net))
+        with PathManager.open(os.path.join(output_dir, "model_init.pb"), "wb") as f:
+            f.write(self._init_net.SerializeToString())
+
+    def save_graph(self, output_file, inputs=None):
+        """
+        Save the graph as SVG format.
+
+        Args:
+            output_file (str): a SVG file
+            inputs: optional inputs given to the model.
+                If given, the inputs will be used to run the graph to record
+                shape of every tensor. The shape information will be
+                saved together with the graph.
+        """
+        from .caffe2_export import run_and_save_graph
+
+        if inputs is None:
+            save_graph(self._predict_net, output_file, op_only=False)
+        else:
+            size_divisibility = get_pb_arg_vali(self._predict_net, "size_divisibility", 0)
+            device = get_pb_arg_vals(self._predict_net, "device", b"cpu").decode("ascii")
+            inputs = convert_batched_inputs_to_c2_format(inputs, size_divisibility, device)
+            inputs = [x.cpu().numpy() for x in inputs]
+            run_and_save_graph(self._predict_net, self._init_net, inputs, output_file)
+
+    @staticmethod
+    def load_protobuf(dir):
+        """
+        Args:
+            dir (str): a directory used to save Caffe2Model with
+                :meth:`save_protobuf`.
+                The files "model.pb" and "model_init.pb" are needed.
+
+        Returns:
+            Caffe2Model: the caffe2 model loaded from this directory.
+        """
+        predict_net = caffe2_pb2.NetDef()
+        with PathManager.open(os.path.join(dir, "model.pb"), "rb") as f:
+            predict_net.ParseFromString(f.read())
+
+        init_net = caffe2_pb2.NetDef()
+        with PathManager.open(os.path.join(dir, "model_init.pb"), "rb") as f:
+            init_net.ParseFromString(f.read())
+
+        return Caffe2Model(predict_net, init_net)
+
+    def __call__(self, inputs):
+        """
+        An interface that wraps around a Caffe2 model and mimics detectron2's models'
+        input/output format. See details about the format at :doc:`/tutorials/models`.
+        This is used to compare the outputs of caffe2 model with its original torch model.
+
+        Due to the extra conversion between Pytorch/Caffe2, this method is not meant for
+        benchmark. Because of the conversion, this method also has dependency
+        on detectron2 in order to convert to detectron2's output format.
+        """
+        if self._predictor is None:
+            self._predictor = ProtobufDetectionModel(self._predict_net, self._init_net)
+        return self._predictor(inputs)
+
+
+def export_caffe2_model(cfg, model, inputs):
+    logger = logging.getLogger(__name__)
+    logger.warning(
+        "export_caffe2_model() is deprecated. Please use `Caffe2Tracer().export_caffe2() instead."
+    )
+    return Caffe2Tracer(cfg, model, inputs).export_caffe2()
+
+
+def export_onnx_model(cfg, model, inputs):
+    logger = logging.getLogger(__name__)
+    logger.warning(
+        "export_caffe2_model() is deprecated. Please use `Caffe2Tracer().export_onnx() instead."
+    )
+    return Caffe2Tracer(cfg, model, inputs).export_onnx()
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/c10.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/c10.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8c52d45964fdbffa52648439f6b82c6b8b3c219
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/c10.py
@@ -0,0 +1,527 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import math
+import torch
+import torch.nn.functional as F
+
+from detectron2.layers import cat
+from detectron2.layers.roi_align_rotated import ROIAlignRotated
+from detectron2.modeling import poolers
+from detectron2.modeling.proposal_generator import rpn
+from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference
+from detectron2.structures import Boxes, ImageList, Instances, Keypoints
+
+from .shared import alias, to_device
+
+
+"""
+This file contains caffe2-compatible implementation of several detectron2 components.
+"""
+
+
+class Caffe2Boxes(Boxes):
+    """
+    Representing a list of detectron2.structures.Boxes from minibatch, each box
+    is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector
+    (batch index + 5 coordinates) for RotatedBoxes.
+    """
+
+    def __init__(self, tensor):
+        assert isinstance(tensor, torch.Tensor)
+        assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size()
+        # TODO: make tensor immutable when dim is Nx5 for Boxes,
+        # and Nx6 for RotatedBoxes?
+        self.tensor = tensor
+
+
+# TODO clean up this class, maybe just extend Instances
+class InstancesList(object):
+    """
+    Tensor representation of a list of Instances object for a batch of images.
+
+    When dealing with a batch of images with Caffe2 ops, a list of bboxes
+    (instances) are usually represented by single Tensor with size
+    (sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is
+    for providing common functions to convert between these two representations.
+    """
+
+    def __init__(self, im_info, indices, extra_fields=None):
+        # [N, 3] -> (H, W, Scale)
+        self.im_info = im_info
+        # [N,] -> indice of batch to which the instance belongs
+        self.indices = indices
+        # [N, ...]
+        self.batch_extra_fields = extra_fields or {}
+
+        self.image_size = self.im_info
+
+    def get_fields(self):
+        """like `get_fields` in the Instances object,
+        but return each field in tensor representations"""
+        ret = {}
+        for k, v in self.batch_extra_fields.items():
+            # if isinstance(v, torch.Tensor):
+            #     tensor_rep = v
+            # elif isinstance(v, (Boxes, Keypoints)):
+            #     tensor_rep = v.tensor
+            # else:
+            #     raise ValueError("Can't find tensor representation for: {}".format())
+            ret[k] = v
+        return ret
+
+    def has(self, name):
+        return name in self.batch_extra_fields
+
+    def set(self, name, value):
+        data_len = len(value)
+        if len(self.batch_extra_fields):
+            assert (
+                len(self) == data_len
+            ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
+        self.batch_extra_fields[name] = value
+
+    def __setattr__(self, name, val):
+        if name in ["im_info", "indices", "batch_extra_fields", "image_size"]:
+            super().__setattr__(name, val)
+        else:
+            self.set(name, val)
+
+    def __getattr__(self, name):
+        if name not in self.batch_extra_fields:
+            raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
+        return self.batch_extra_fields[name]
+
+    def __len__(self):
+        return len(self.indices)
+
+    def flatten(self):
+        ret = []
+        for _, v in self.batch_extra_fields.items():
+            if isinstance(v, (Boxes, Keypoints)):
+                ret.append(v.tensor)
+            else:
+                ret.append(v)
+        return ret
+
+    @staticmethod
+    def to_d2_instances_list(instances_list):
+        """
+        Convert InstancesList to List[Instances]. The input `instances_list` can
+        also be a List[Instances], in this case this method is a non-op.
+        """
+        if not isinstance(instances_list, InstancesList):
+            assert all(isinstance(x, Instances) for x in instances_list)
+            return instances_list
+
+        ret = []
+        for i, info in enumerate(instances_list.im_info):
+            instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())]))
+
+            ids = instances_list.indices == i
+            for k, v in instances_list.batch_extra_fields.items():
+                if isinstance(v, torch.Tensor):
+                    instances.set(k, v[ids])
+                    continue
+                elif isinstance(v, Boxes):
+                    instances.set(k, v[ids, -4:])
+                    continue
+
+                target_type, tensor_source = v
+                assert isinstance(tensor_source, torch.Tensor)
+                assert tensor_source.shape[0] == instances_list.indices.shape[0]
+                tensor_source = tensor_source[ids]
+
+                if issubclass(target_type, Boxes):
+                    instances.set(k, Boxes(tensor_source[:, -4:]))
+                elif issubclass(target_type, Keypoints):
+                    instances.set(k, Keypoints(tensor_source))
+                elif issubclass(target_type, torch.Tensor):
+                    instances.set(k, tensor_source)
+                else:
+                    raise ValueError("Can't handle targe type: {}".format(target_type))
+
+            ret.append(instances)
+        return ret
+
+
+class Caffe2Compatible(object):
+    """
+    A model can inherit this class to indicate that it can be traced and deployed with caffe2.
+    """
+
+    def _get_tensor_mode(self):
+        return self._tensor_mode
+
+    def _set_tensor_mode(self, v):
+        self._tensor_mode = v
+
+    tensor_mode = property(_get_tensor_mode, _set_tensor_mode)
+    """
+    If true, the model expects C2-style tensor only inputs/outputs format.
+    """
+
+
+class Caffe2RPN(Caffe2Compatible, rpn.RPN):
+    def _generate_proposals(
+        self, images, objectness_logits_pred, anchor_deltas_pred, gt_instances=None
+    ):
+        assert isinstance(images, ImageList)
+        if self.tensor_mode:
+            im_info = images.image_sizes
+        else:
+            im_info = torch.tensor([[im_sz[0], im_sz[1], 1.0] for im_sz in images.image_sizes]).to(
+                images.tensor.device
+            )
+        assert isinstance(im_info, torch.Tensor)
+
+        rpn_rois_list = []
+        rpn_roi_probs_list = []
+        for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip(
+            objectness_logits_pred,
+            anchor_deltas_pred,
+            iter(self.anchor_generator.cell_anchors),
+            self.anchor_generator.strides,
+        ):
+            scores = scores.detach()
+            bbox_deltas = bbox_deltas.detach()
+
+            rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals(
+                scores,
+                bbox_deltas,
+                im_info,
+                cell_anchors_tensor,
+                spatial_scale=1.0 / feat_stride,
+                pre_nms_topN=self.pre_nms_topk[self.training],
+                post_nms_topN=self.post_nms_topk[self.training],
+                nms_thresh=self.nms_thresh,
+                min_size=self.min_box_size,
+                # correct_transform_coords=True,  # deprecated argument
+                angle_bound_on=True,  # Default
+                angle_bound_lo=-180,
+                angle_bound_hi=180,
+                clip_angle_thresh=1.0,  # Default
+                legacy_plus_one=False,
+            )
+            rpn_rois_list.append(rpn_rois)
+            rpn_roi_probs_list.append(rpn_roi_probs)
+
+        # For FPN in D2, in RPN all proposals from different levels are concated
+        # together, ranked and picked by top post_nms_topk. Then in ROIPooler
+        # it calculates level_assignments and calls the RoIAlign from
+        # the corresponding level.
+
+        if len(objectness_logits_pred) == 1:
+            rpn_rois = rpn_rois_list[0]
+            rpn_roi_probs = rpn_roi_probs_list[0]
+        else:
+            assert len(rpn_rois_list) == len(rpn_roi_probs_list)
+            rpn_post_nms_topN = self.post_nms_topk[self.training]
+
+            device = rpn_rois_list[0].device
+            input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)]
+
+            # TODO remove this after confirming rpn_max_level/rpn_min_level
+            # is not needed in CollectRpnProposals.
+            feature_strides = list(self.anchor_generator.strides)
+            rpn_min_level = int(math.log2(feature_strides[0]))
+            rpn_max_level = int(math.log2(feature_strides[-1]))
+            assert (rpn_max_level - rpn_min_level + 1) == len(
+                rpn_rois_list
+            ), "CollectRpnProposals requires continuous levels"
+
+            rpn_rois = torch.ops._caffe2.CollectRpnProposals(
+                input_list,
+                # NOTE: in current implementation, rpn_max_level and rpn_min_level
+                # are not needed, only the subtraction of two matters and it
+                # can be infer from the number of inputs. Keep them now for
+                # consistency.
+                rpn_max_level=2 + len(rpn_rois_list) - 1,
+                rpn_min_level=2,
+                rpn_post_nms_topN=rpn_post_nms_topN,
+            )
+            rpn_rois = to_device(rpn_rois, device)
+            rpn_roi_probs = []
+
+        proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode)
+        return proposals, {}
+
+    def forward(self, images, features, gt_instances=None):
+        assert not self.training
+        features = [features[f] for f in self.in_features]
+        objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features)
+        return self._generate_proposals(
+            images,
+            objectness_logits_pred,
+            anchor_deltas_pred,
+            gt_instances,
+        )
+
+    @staticmethod
+    def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode):
+        proposals = InstancesList(
+            im_info=im_info,
+            indices=rpn_rois[:, 0],
+            extra_fields={
+                "proposal_boxes": Caffe2Boxes(rpn_rois),
+                "objectness_logits": (torch.Tensor, rpn_roi_probs),
+            },
+        )
+        if not tensor_mode:
+            proposals = InstancesList.to_d2_instances_list(proposals)
+        else:
+            proposals = [proposals]
+        return proposals
+
+
+class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler):
+    @staticmethod
+    def c2_preprocess(box_lists):
+        assert all(isinstance(x, Boxes) for x in box_lists)
+        if all(isinstance(x, Caffe2Boxes) for x in box_lists):
+            # input is pure-tensor based
+            assert len(box_lists) == 1
+            pooler_fmt_boxes = box_lists[0].tensor
+        else:
+            pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists)
+        return pooler_fmt_boxes
+
+    def forward(self, x, box_lists):
+        assert not self.training
+
+        pooler_fmt_boxes = self.c2_preprocess(box_lists)
+        num_level_assignments = len(self.level_poolers)
+
+        if num_level_assignments == 1:
+            if isinstance(self.level_poolers[0], ROIAlignRotated):
+                c2_roi_align = torch.ops._caffe2.RoIAlignRotated
+                aligned = True
+            else:
+                c2_roi_align = torch.ops._caffe2.RoIAlign
+                aligned = self.level_poolers[0].aligned
+
+            out = c2_roi_align(
+                x[0],
+                pooler_fmt_boxes,
+                order="NCHW",
+                spatial_scale=float(self.level_poolers[0].spatial_scale),
+                pooled_h=int(self.output_size[0]),
+                pooled_w=int(self.output_size[1]),
+                sampling_ratio=int(self.level_poolers[0].sampling_ratio),
+                aligned=aligned,
+            )
+            return out
+
+        device = pooler_fmt_boxes.device
+        assert (
+            self.max_level - self.min_level + 1 == 4
+        ), "Currently DistributeFpnProposals only support 4 levels"
+        fpn_outputs = torch.ops._caffe2.DistributeFpnProposals(
+            to_device(pooler_fmt_boxes, "cpu"),
+            roi_canonical_scale=self.canonical_box_size,
+            roi_canonical_level=self.canonical_level,
+            roi_max_level=self.max_level,
+            roi_min_level=self.min_level,
+            legacy_plus_one=False,
+        )
+        fpn_outputs = [to_device(x, device) for x in fpn_outputs]
+
+        rois_fpn_list = fpn_outputs[:-1]
+        rois_idx_restore_int32 = fpn_outputs[-1]
+
+        roi_feat_fpn_list = []
+        for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers):
+            if isinstance(pooler, ROIAlignRotated):
+                c2_roi_align = torch.ops._caffe2.RoIAlignRotated
+                aligned = True
+            else:
+                c2_roi_align = torch.ops._caffe2.RoIAlign
+                aligned = bool(pooler.aligned)
+
+            roi_feat_fpn = c2_roi_align(
+                x_level,
+                roi_fpn,
+                order="NCHW",
+                spatial_scale=float(pooler.spatial_scale),
+                pooled_h=int(self.output_size[0]),
+                pooled_w=int(self.output_size[1]),
+                sampling_ratio=int(pooler.sampling_ratio),
+                aligned=aligned,
+            )
+            roi_feat_fpn_list.append(roi_feat_fpn)
+
+        roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0)
+        assert roi_feat_shuffled.numel() > 0 and rois_idx_restore_int32.numel() > 0, (
+            "Caffe2 export requires tracing with a model checkpoint + input that can produce valid"
+            " detections. But no detections were obtained with the given checkpoint and input!"
+        )
+        roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32)
+        return roi_feat
+
+
+class Caffe2FastRCNNOutputsInference:
+    def __init__(self, tensor_mode):
+        self.tensor_mode = tensor_mode  # whether the output is caffe2 tensor mode
+
+    def __call__(self, box_predictor, predictions, proposals):
+        """ equivalent to FastRCNNOutputLayers.inference """
+        num_classes = box_predictor.num_classes
+        score_thresh = box_predictor.test_score_thresh
+        nms_thresh = box_predictor.test_nms_thresh
+        topk_per_image = box_predictor.test_topk_per_image
+        is_rotated = len(box_predictor.box2box_transform.weights) == 5
+
+        if is_rotated:
+            box_dim = 5
+            assert box_predictor.box2box_transform.weights[4] == 1, (
+                "The weights for Rotated BBoxTransform in C2 have only 4 dimensions,"
+                + " thus enforcing the angle weight to be 1 for now"
+            )
+            box2box_transform_weights = box_predictor.box2box_transform.weights[:4]
+        else:
+            box_dim = 4
+            box2box_transform_weights = box_predictor.box2box_transform.weights
+
+        class_logits, box_regression = predictions
+        if num_classes + 1 == class_logits.shape[1]:
+            class_prob = F.softmax(class_logits, -1)
+        else:
+            assert num_classes == class_logits.shape[1]
+            class_prob = F.sigmoid(class_logits)
+            # BoxWithNMSLimit will infer num_classes from the shape of the class_prob
+            # So append a zero column as placeholder for the background class
+            class_prob = torch.cat((class_prob, torch.zeros(class_prob.shape[0], 1)), dim=1)
+
+        assert box_regression.shape[1] % box_dim == 0
+        cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1
+
+        input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1
+
+        rois = type(proposals[0].proposal_boxes).cat([p.proposal_boxes for p in proposals])
+        device, dtype = rois.tensor.device, rois.tensor.dtype
+        if input_tensor_mode:
+            im_info = proposals[0].image_size
+            rois = rois.tensor
+        else:
+            im_info = torch.tensor(
+                [[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]]
+            )
+            batch_ids = cat(
+                [
+                    torch.full((b, 1), i, dtype=dtype, device=device)
+                    for i, b in enumerate(len(p) for p in proposals)
+                ],
+                dim=0,
+            )
+            rois = torch.cat([batch_ids, rois.tensor], dim=1)
+
+        roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform(
+            to_device(rois, "cpu"),
+            to_device(box_regression, "cpu"),
+            to_device(im_info, "cpu"),
+            weights=box2box_transform_weights,
+            apply_scale=True,
+            rotated=is_rotated,
+            angle_bound_on=True,
+            angle_bound_lo=-180,
+            angle_bound_hi=180,
+            clip_angle_thresh=1.0,
+            legacy_plus_one=False,
+        )
+        roi_pred_bbox = to_device(roi_pred_bbox, device)
+        roi_batch_splits = to_device(roi_batch_splits, device)
+
+        nms_outputs = torch.ops._caffe2.BoxWithNMSLimit(
+            to_device(class_prob, "cpu"),
+            to_device(roi_pred_bbox, "cpu"),
+            to_device(roi_batch_splits, "cpu"),
+            score_thresh=float(score_thresh),
+            nms=float(nms_thresh),
+            detections_per_im=int(topk_per_image),
+            soft_nms_enabled=False,
+            soft_nms_method="linear",
+            soft_nms_sigma=0.5,
+            soft_nms_min_score_thres=0.001,
+            rotated=is_rotated,
+            cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
+            input_boxes_include_bg_cls=False,
+            output_classes_include_bg_cls=False,
+            legacy_plus_one=False,
+        )
+        roi_score_nms = to_device(nms_outputs[0], device)
+        roi_bbox_nms = to_device(nms_outputs[1], device)
+        roi_class_nms = to_device(nms_outputs[2], device)
+        roi_batch_splits_nms = to_device(nms_outputs[3], device)
+        roi_keeps_nms = to_device(nms_outputs[4], device)
+        roi_keeps_size_nms = to_device(nms_outputs[5], device)
+        if not self.tensor_mode:
+            roi_class_nms = roi_class_nms.to(torch.int64)
+
+        roi_batch_ids = cat(
+            [
+                torch.full((b, 1), i, dtype=dtype, device=device)
+                for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms)
+            ],
+            dim=0,
+        )
+
+        roi_class_nms = alias(roi_class_nms, "class_nms")
+        roi_score_nms = alias(roi_score_nms, "score_nms")
+        roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms")
+        roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms")
+        roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms")
+        roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms")
+
+        results = InstancesList(
+            im_info=im_info,
+            indices=roi_batch_ids[:, 0],
+            extra_fields={
+                "pred_boxes": Caffe2Boxes(roi_bbox_nms),
+                "scores": roi_score_nms,
+                "pred_classes": roi_class_nms,
+            },
+        )
+
+        if not self.tensor_mode:
+            results = InstancesList.to_d2_instances_list(results)
+            batch_splits = roi_batch_splits_nms.int().tolist()
+            kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits))
+        else:
+            results = [results]
+            kept_indices = [roi_keeps_nms]
+
+        return results, kept_indices
+
+
+class Caffe2MaskRCNNInference:
+    def __call__(self, pred_mask_logits, pred_instances):
+        """ equivalent to mask_head.mask_rcnn_inference """
+        if all(isinstance(x, InstancesList) for x in pred_instances):
+            assert len(pred_instances) == 1
+            mask_probs_pred = pred_mask_logits.sigmoid()
+            mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs")
+            pred_instances[0].pred_masks = mask_probs_pred
+        else:
+            mask_rcnn_inference(pred_mask_logits, pred_instances)
+
+
+class Caffe2KeypointRCNNInference:
+    def __init__(self, use_heatmap_max_keypoint):
+        self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
+
+    def __call__(self, pred_keypoint_logits, pred_instances):
+        # just return the keypoint heatmap for now,
+        # there will be option to call HeatmapMaxKeypointOp
+        output = alias(pred_keypoint_logits, "kps_score")
+        if all(isinstance(x, InstancesList) for x in pred_instances):
+            assert len(pred_instances) == 1
+            if self.use_heatmap_max_keypoint:
+                device = output.device
+                output = torch.ops._caffe2.HeatmapMaxKeypoint(
+                    to_device(output, "cpu"),
+                    pred_instances[0].pred_boxes.tensor,
+                    should_output_softmax=True,  # worth make it configerable?
+                )
+                output = to_device(output, device)
+                output = alias(output, "keypoints_out")
+            pred_instances[0].pred_keypoints = output
+        return pred_keypoint_logits
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_export.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_export.py
new file mode 100644
index 0000000000000000000000000000000000000000..74ac123a7aed6cd77d6d833446a831d9048745b2
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_export.py
@@ -0,0 +1,207 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import copy
+import io
+import logging
+import numpy as np
+from typing import List
+import onnx
+import torch
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from caffe2.python.onnx.backend import Caffe2Backend
+from tabulate import tabulate
+from termcolor import colored
+from torch.onnx import OperatorExportTypes
+
+from .shared import (
+    ScopedWS,
+    construct_init_net_from_params,
+    fuse_alias_placeholder,
+    fuse_copy_between_cpu_and_gpu,
+    get_params_from_init_net,
+    group_norm_replace_aten_with_caffe2,
+    infer_device_type,
+    remove_dead_end_ops,
+    remove_reshape_for_fc,
+    save_graph,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def export_onnx_model(model, inputs):
+    """
+    Trace and export a model to onnx format.
+
+    Args:
+        model (nn.Module):
+        inputs (tuple[args]): the model will be called by `model(*inputs)`
+
+    Returns:
+        an onnx model
+    """
+    assert isinstance(model, torch.nn.Module)
+
+    # make sure all modules are in eval mode, onnx may change the training state
+    # of the module if the states are not consistent
+    def _check_eval(module):
+        assert not module.training
+
+    model.apply(_check_eval)
+
+    # Export the model to ONNX
+    with torch.no_grad():
+        with io.BytesIO() as f:
+            torch.onnx.export(
+                model,
+                inputs,
+                f,
+                operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK,
+                # verbose=True,  # NOTE: uncomment this for debugging
+                # export_params=True,
+            )
+            onnx_model = onnx.load_from_string(f.getvalue())
+
+    # Apply ONNX's Optimization
+    all_passes = onnx.optimizer.get_available_passes()
+    passes = ["fuse_bn_into_conv"]
+    assert all(p in all_passes for p in passes)
+    onnx_model = onnx.optimizer.optimize(onnx_model, passes)
+    return onnx_model
+
+
+def _op_stats(net_def):
+    type_count = {}
+    for t in [op.type for op in net_def.op]:
+        type_count[t] = type_count.get(t, 0) + 1
+    type_count_list = sorted(type_count.items(), key=lambda kv: kv[0])  # alphabet
+    type_count_list = sorted(type_count_list, key=lambda kv: -kv[1])  # count
+    return "\n".join("{:>4}x {}".format(count, name) for name, count in type_count_list)
+
+
+def _assign_device_option(
+    predict_net: caffe2_pb2.NetDef, init_net: caffe2_pb2.NetDef, tensor_inputs: List[torch.Tensor]
+):
+    """
+    ONNX exported network doesn't have concept of device, assign necessary
+    device option for each op in order to make it runable on GPU runtime.
+    """
+
+    def _get_device_type(torch_tensor):
+        assert torch_tensor.device.type in ["cpu", "cuda"]
+        assert torch_tensor.device.index == 0
+        return torch_tensor.device.type
+
+    def _assign_op_device_option(net_proto, net_ssa, blob_device_types):
+        for op, ssa_i in zip(net_proto.op, net_ssa):
+            if op.type in ["CopyCPUToGPU", "CopyGPUToCPU"]:
+                op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
+            else:
+                devices = [blob_device_types[b] for b in ssa_i[0] + ssa_i[1]]
+                assert all(d == devices[0] for d in devices)
+                if devices[0] == "cuda":
+                    op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
+
+    # update ops in predict_net
+    predict_net_input_device_types = {
+        (name, 0): _get_device_type(tensor)
+        for name, tensor in zip(predict_net.external_input, tensor_inputs)
+    }
+    predict_net_device_types = infer_device_type(
+        predict_net, known_status=predict_net_input_device_types, device_name_style="pytorch"
+    )
+    predict_net_ssa, _ = core.get_ssa(predict_net)
+    _assign_op_device_option(predict_net, predict_net_ssa, predict_net_device_types)
+
+    # update ops in init_net
+    init_net_ssa, versions = core.get_ssa(init_net)
+    init_net_output_device_types = {
+        (name, versions[name]): predict_net_device_types[(name, 0)]
+        for name in init_net.external_output
+    }
+    init_net_device_types = infer_device_type(
+        init_net, known_status=init_net_output_device_types, device_name_style="pytorch"
+    )
+    _assign_op_device_option(init_net, init_net_ssa, init_net_device_types)
+
+
+def export_caffe2_detection_model(model: torch.nn.Module, tensor_inputs: List[torch.Tensor]):
+    """
+    Export a caffe2-compatible Detectron2 model to caffe2 format via ONNX.
+
+    Arg:
+        model: a caffe2-compatible version of detectron2 model, defined in caffe2_modeling.py
+        tensor_inputs: a list of tensors that caffe2 model takes as input.
+    """
+    model = copy.deepcopy(model)
+    assert isinstance(model, torch.nn.Module)
+    assert hasattr(model, "encode_additional_info")
+
+    # Export via ONNX
+    logger.info(
+        "Exporting a {} model via ONNX ...".format(type(model).__name__)
+        + " Some warnings from ONNX are expected and are usually not to worry about."
+    )
+    onnx_model = export_onnx_model(model, (tensor_inputs,))
+    # Convert ONNX model to Caffe2 protobuf
+    init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model)
+    ops_table = [[op.type, op.input, op.output] for op in predict_net.op]
+    table = tabulate(ops_table, headers=["type", "input", "output"], tablefmt="pipe")
+    logger.info(
+        "ONNX export Done. Exported predict_net (before optimizations):\n" + colored(table, "cyan")
+    )
+
+    # Apply protobuf optimization
+    fuse_alias_placeholder(predict_net, init_net)
+    if any(t.device.type != "cpu" for t in tensor_inputs):
+        fuse_copy_between_cpu_and_gpu(predict_net)
+        remove_dead_end_ops(init_net)
+        _assign_device_option(predict_net, init_net, tensor_inputs)
+    params, device_options = get_params_from_init_net(init_net)
+    predict_net, params = remove_reshape_for_fc(predict_net, params)
+    init_net = construct_init_net_from_params(params, device_options)
+    group_norm_replace_aten_with_caffe2(predict_net)
+
+    # Record necessary information for running the pb model in Detectron2 system.
+    model.encode_additional_info(predict_net, init_net)
+
+    logger.info("Operators used in predict_net: \n{}".format(_op_stats(predict_net)))
+    logger.info("Operators used in init_net: \n{}".format(_op_stats(init_net)))
+
+    return predict_net, init_net
+
+
+def run_and_save_graph(predict_net, init_net, tensor_inputs, graph_save_path):
+    """
+    Run the caffe2 model on given inputs, recording the shape and draw the graph.
+
+    predict_net/init_net: caffe2 model.
+    tensor_inputs: a list of tensors that caffe2 model takes as input.
+    graph_save_path: path for saving graph of exported model.
+    """
+
+    logger.info("Saving graph of ONNX exported model to {} ...".format(graph_save_path))
+    save_graph(predict_net, graph_save_path, op_only=False)
+
+    # Run the exported Caffe2 net
+    logger.info("Running ONNX exported model ...")
+    with ScopedWS("__ws_tmp__", True) as ws:
+        ws.RunNetOnce(init_net)
+        initialized_blobs = set(ws.Blobs())
+        uninitialized = [inp for inp in predict_net.external_input if inp not in initialized_blobs]
+        for name, blob in zip(uninitialized, tensor_inputs):
+            ws.FeedBlob(name, blob)
+
+        try:
+            ws.RunNetOnce(predict_net)
+        except RuntimeError as e:
+            logger.warning("Encountered RuntimeError: \n{}".format(str(e)))
+
+        ws_blobs = {b: ws.FetchBlob(b) for b in ws.Blobs()}
+        blob_sizes = {b: ws_blobs[b].shape for b in ws_blobs if isinstance(ws_blobs[b], np.ndarray)}
+
+        logger.info("Saving graph with blob shapes to {} ...".format(graph_save_path))
+        save_graph(predict_net, graph_save_path, op_only=False, blob_sizes=blob_sizes)
+
+        return ws_blobs
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_inference.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..deb886c0417285ed1d5ad85eb941fa1ac757cdab
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_inference.py
@@ -0,0 +1,161 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+import numpy as np
+from itertools import count
+import torch
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+
+from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
+from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type
+
+logger = logging.getLogger(__name__)
+
+
+# ===== ref: mobile-vision predictor's 'Caffe2Wrapper' class ======
+class ProtobufModel(torch.nn.Module):
+    """
+    Wrapper of a caffe2's protobuf model.
+    It works just like nn.Module, but running caffe2 under the hood.
+    Input/Output are tuple[tensor] that match the caffe2 net's external_input/output.
+    """
+
+    _ids = count(0)
+
+    def __init__(self, predict_net, init_net):
+        logger.info(f"Initializing ProtobufModel for: {predict_net.name} ...")
+        super().__init__()
+        assert isinstance(predict_net, caffe2_pb2.NetDef)
+        assert isinstance(init_net, caffe2_pb2.NetDef)
+        # create unique temporary workspace for each instance
+        self.ws_name = "__tmp_ProtobufModel_{}__".format(next(self._ids))
+        self.net = core.Net(predict_net)
+
+        logger.info("Running init_net once to fill the parameters ...")
+        with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws:
+            ws.RunNetOnce(init_net)
+            uninitialized_external_input = []
+            for blob in self.net.Proto().external_input:
+                if blob not in ws.Blobs():
+                    uninitialized_external_input.append(blob)
+                    ws.CreateBlob(blob)
+            ws.CreateNet(self.net)
+
+        self._error_msgs = set()
+        self._input_blobs = uninitialized_external_input
+
+    def _infer_output_devices(self, inputs):
+        """
+        Returns:
+            list[str]: list of device for each external output
+        """
+
+        def _get_device_type(torch_tensor):
+            assert torch_tensor.device.type in ["cpu", "cuda"]
+            assert torch_tensor.device.index == 0
+            return torch_tensor.device.type
+
+        predict_net = self.net.Proto()
+        input_device_types = {
+            (name, 0): _get_device_type(tensor) for name, tensor in zip(self._input_blobs, inputs)
+        }
+        device_type_map = infer_device_type(
+            predict_net, known_status=input_device_types, device_name_style="pytorch"
+        )
+        ssa, versions = core.get_ssa(predict_net)
+        versioned_outputs = [(name, versions[name]) for name in predict_net.external_output]
+        output_devices = [device_type_map[outp] for outp in versioned_outputs]
+        return output_devices
+
+    def forward(self, inputs):
+        """
+        Args:
+            inputs (tuple[torch.Tensor])
+
+        Returns:
+            tuple[torch.Tensor]
+        """
+        assert len(inputs) == len(self._input_blobs), (
+            f"Length of inputs ({len(inputs)}) "
+            f"doesn't match the required input blobs: {self._input_blobs}"
+        )
+
+        with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws:
+            for b, tensor in zip(self._input_blobs, inputs):
+                ws.FeedBlob(b, tensor)
+
+            try:
+                ws.RunNet(self.net.Proto().name)
+            except RuntimeError as e:
+                if not str(e) in self._error_msgs:
+                    self._error_msgs.add(str(e))
+                    logger.warning("Encountered new RuntimeError: \n{}".format(str(e)))
+                logger.warning("Catch the error and use partial results.")
+
+            c2_outputs = [ws.FetchBlob(b) for b in self.net.Proto().external_output]
+            # Remove outputs of current run, this is necessary in order to
+            # prevent fetching the result from previous run if the model fails
+            # in the middle.
+            for b in self.net.Proto().external_output:
+                # Needs to create uninitialized blob to make the net runable.
+                # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b),
+                # but there'no such API.
+                ws.FeedBlob(b, f"{b}, a C++ native class of type nullptr (uninitialized).")
+
+        # Cast output to torch.Tensor on the desired device
+        output_devices = (
+            self._infer_output_devices(inputs)
+            if any(t.device.type != "cpu" for t in inputs)
+            else ["cpu" for _ in self.net.Proto().external_output]
+        )
+
+        outputs = []
+        for name, c2_output, device in zip(
+            self.net.Proto().external_output, c2_outputs, output_devices
+        ):
+            if not isinstance(c2_output, np.ndarray):
+                raise RuntimeError(
+                    "Invalid output for blob {}, received: {}".format(name, c2_output)
+                )
+            outputs.append(torch.tensor(c2_output).to(device=device))
+        return tuple(outputs)
+
+
+class ProtobufDetectionModel(torch.nn.Module):
+    """
+    A class works just like a pytorch meta arch in terms of inference, but running
+    caffe2 model under the hood.
+    """
+
+    def __init__(self, predict_net, init_net, *, convert_outputs=None):
+        """
+        Args:
+            predict_net, init_net (core.Net): caffe2 nets
+            convert_outptus (callable): a function that converts caffe2
+                outputs to the same format of the original pytorch model.
+                By default, use the one defined in the caffe2 meta_arch.
+        """
+        super().__init__()
+        self.protobuf_model = ProtobufModel(predict_net, init_net)
+        self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0)
+        self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii")
+
+        if convert_outputs is None:
+            meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN")
+            meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")]
+            self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net)
+        else:
+            self._convert_outputs = convert_outputs
+
+    def _convert_inputs(self, batched_inputs):
+        # currently all models convert inputs in the same way
+        return convert_batched_inputs_to_c2_format(
+            batched_inputs, self.size_divisibility, self.device
+        )
+
+    def forward(self, batched_inputs):
+        c2_inputs = self._convert_inputs(batched_inputs)
+        c2_results = self.protobuf_model(c2_inputs)
+        c2_results = dict(zip(self.protobuf_model.net.Proto().external_output, c2_results))
+        return self._convert_outputs(batched_inputs, c2_inputs, c2_results)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_modeling.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_modeling.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a9fc78164c32f6709245d3a456af19ffde7c497
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_modeling.py
@@ -0,0 +1,503 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import functools
+import io
+import struct
+import types
+import torch
+
+from detectron2.modeling import meta_arch
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.meta_arch.panoptic_fpn import combine_semantic_and_instance_outputs
+from detectron2.modeling.meta_arch.retinanet import permute_to_N_HWA_K
+from detectron2.modeling.postprocessing import detector_postprocess, sem_seg_postprocess
+from detectron2.modeling.roi_heads import keypoint_head
+from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
+
+from .c10 import Caffe2Compatible
+from .caffe2_patch import ROIHeadsPatcher, patch_generalized_rcnn
+from .shared import (
+    alias,
+    check_set_pb_arg,
+    get_pb_arg_floats,
+    get_pb_arg_valf,
+    get_pb_arg_vali,
+    get_pb_arg_vals,
+    mock_torch_nn_functional_interpolate,
+)
+
+
+def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False):
+    """
+    A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor])
+    to detectron2's format (i.e. list of Instances instance).
+    This only works when the model follows the Caffe2 detectron's naming convention.
+
+    Args:
+        image_sizes (List[List[int, int]]): [H, W] of every image.
+        tensor_outputs (Dict[str, Tensor]): external_output to its tensor.
+
+        force_mask_on (Bool): if true, the it make sure there'll be pred_masks even
+            if the mask is not found from tensor_outputs (usually due to model crash)
+    """
+
+    results = [Instances(image_size) for image_size in image_sizes]
+
+    batch_splits = tensor_outputs.get("batch_splits", None)
+    if batch_splits:
+        raise NotImplementedError()
+    assert len(image_sizes) == 1
+    result = results[0]
+
+    bbox_nms = tensor_outputs["bbox_nms"]
+    score_nms = tensor_outputs["score_nms"]
+    class_nms = tensor_outputs["class_nms"]
+    # Detection will always success because Conv support 0-batch
+    assert bbox_nms is not None
+    assert score_nms is not None
+    assert class_nms is not None
+    if bbox_nms.shape[1] == 5:
+        result.pred_boxes = RotatedBoxes(bbox_nms)
+    else:
+        result.pred_boxes = Boxes(bbox_nms)
+    result.scores = score_nms
+    result.pred_classes = class_nms.to(torch.int64)
+
+    mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None)
+    if mask_fcn_probs is not None:
+        # finish the mask pred
+        mask_probs_pred = mask_fcn_probs
+        num_masks = mask_probs_pred.shape[0]
+        class_pred = result.pred_classes
+        indices = torch.arange(num_masks, device=class_pred.device)
+        mask_probs_pred = mask_probs_pred[indices, class_pred][:, None]
+        result.pred_masks = mask_probs_pred
+    elif force_mask_on:
+        # NOTE: there's no way to know the height/width of mask here, it won't be
+        # used anyway when batch size is 0, so just set them to 0.
+        result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8)
+
+    keypoints_out = tensor_outputs.get("keypoints_out", None)
+    kps_score = tensor_outputs.get("kps_score", None)
+    if keypoints_out is not None:
+        # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob)
+        keypoints_tensor = keypoints_out
+        # NOTE: it's possible that prob is not calculated if "should_output_softmax"
+        # is set to False in HeatmapMaxKeypoint, so just using raw score, seems
+        # it doesn't affect mAP. TODO: check more carefully.
+        keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]]
+        result.pred_keypoints = keypoint_xyp
+    elif kps_score is not None:
+        # keypoint heatmap to sparse data structure
+        pred_keypoint_logits = kps_score
+        keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result])
+
+    return results
+
+
+def _cast_to_f32(f64):
+    return struct.unpack("f", struct.pack("f", f64))[0]
+
+
+def set_caffe2_compatible_tensor_mode(model, enable=True):
+    def _fn(m):
+        if isinstance(m, Caffe2Compatible):
+            m.tensor_mode = enable
+
+    model.apply(_fn)
+
+
+def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device):
+    """
+    See get_caffe2_inputs() below.
+    """
+    assert all(isinstance(x, dict) for x in batched_inputs)
+    assert all(x["image"].dim() == 3 for x in batched_inputs)
+
+    images = [x["image"] for x in batched_inputs]
+    images = ImageList.from_tensors(images, size_divisibility)
+
+    im_info = []
+    for input_per_image, image_size in zip(batched_inputs, images.image_sizes):
+        target_height = input_per_image.get("height", image_size[0])
+        target_width = input_per_image.get("width", image_size[1])  # noqa
+        # NOTE: The scale inside im_info is kept as convention and for providing
+        # post-processing information if further processing is needed. For
+        # current Caffe2 model definitions that don't include post-processing inside
+        # the model, this number is not used.
+        # NOTE: There can be a slight difference between width and height
+        # scales, using a single number can results in numerical difference
+        # compared with D2's post-processing.
+        scale = target_height / image_size[0]
+        im_info.append([image_size[0], image_size[1], scale])
+    im_info = torch.Tensor(im_info)
+
+    return images.tensor.to(device), im_info.to(device)
+
+
+class Caffe2MetaArch(Caffe2Compatible, torch.nn.Module):
+    """
+    Base class for caffe2-compatible implementation of a meta architecture.
+    The forward is traceable and its traced graph can be converted to caffe2
+    graph through ONNX.
+    """
+
+    def __init__(self, cfg, torch_model):
+        """
+        Args:
+            cfg (CfgNode):
+            torch_model (nn.Module): the detectron2 model (meta_arch) to be
+                converted.
+        """
+        super().__init__()
+        self._wrapped_model = torch_model
+        self.eval()
+        set_caffe2_compatible_tensor_mode(self, True)
+
+    def get_caffe2_inputs(self, batched_inputs):
+        """
+        Convert pytorch-style structured inputs to caffe2-style inputs that
+        are tuples of tensors.
+
+        Args:
+            batched_inputs (list[dict]): inputs to a detectron2 model
+                in its standard format. Each dict has "image" (CHW tensor), and optionally
+                "height" and "width".
+
+        Returns:
+            tuple[Tensor]:
+                tuple of tensors that will be the inputs to the
+                :meth:`forward` method. For existing models, the first
+                is an NCHW tensor (padded and batched); the second is
+                a im_info Nx3 tensor, where the rows are
+                (height, width, unused legacy parameter)
+        """
+        return convert_batched_inputs_to_c2_format(
+            batched_inputs,
+            self._wrapped_model.backbone.size_divisibility,
+            self._wrapped_model.device,
+        )
+
+    def encode_additional_info(self, predict_net, init_net):
+        """
+        Save extra metadata that will be used by inference in the output protobuf.
+        """
+        pass
+
+    def forward(self, inputs):
+        """
+        Run the forward in caffe2-style. It has to use caffe2-compatible ops
+        and the method will be used for tracing.
+
+        Args:
+            inputs (tuple[Tensor]): inputs defined by :meth:`get_caffe2_input`.
+                They will be the inputs of the converted caffe2 graph.
+
+        Returns:
+            tuple[Tensor]: output tensors. They will be the outputs of the
+                converted caffe2 graph.
+        """
+        raise NotImplementedError
+
+    def _caffe2_preprocess_image(self, inputs):
+        """
+        Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward.
+        It normalizes the input images, and the final caffe2 graph assumes the
+        inputs have been batched already.
+        """
+        data, im_info = inputs
+        data = alias(data, "data")
+        im_info = alias(im_info, "im_info")
+        mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std
+        normalized_data = (data - mean) / std
+        normalized_data = alias(normalized_data, "normalized_data")
+
+        # Pack (data, im_info) into ImageList which is recognized by self.inference.
+        images = ImageList(tensor=normalized_data, image_sizes=im_info)
+        return images
+
+    @staticmethod
+    def get_outputs_converter(predict_net, init_net):
+        """
+        Creates a function that converts outputs of the caffe2 model to
+        detectron2's standard format.
+        The function uses information in `predict_net` and `init_net` that are
+        available at inferene time. Therefore the function logic can be used in inference.
+
+        The returned function has the following signature:
+
+            def convert(batched_inputs, c2_inputs, c2_results) -> detectron2_outputs
+
+        Where
+
+            * batched_inputs (list[dict]): the original input format of the meta arch
+            * c2_inputs (tuple[Tensor]): the caffe2 inputs.
+            * c2_results (dict[str, Tensor]): the caffe2 output format,
+                corresponding to the outputs of the :meth:`forward` function.
+            * detectron2_outputs: the original output format of the meta arch.
+
+        This function can be used to compare the outputs of the original meta arch and
+        the converted caffe2 graph.
+
+        Returns:
+            callable: a callable of the above signature.
+        """
+        raise NotImplementedError
+
+
+class Caffe2GeneralizedRCNN(Caffe2MetaArch):
+    def __init__(self, cfg, torch_model):
+        assert isinstance(torch_model, meta_arch.GeneralizedRCNN)
+        torch_model = patch_generalized_rcnn(torch_model)
+        super().__init__(cfg, torch_model)
+
+        self.roi_heads_patcher = ROIHeadsPatcher(
+            self._wrapped_model.roi_heads, cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT
+        )
+
+    def encode_additional_info(self, predict_net, init_net):
+        size_divisibility = self._wrapped_model.backbone.size_divisibility
+        check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
+        check_set_pb_arg(
+            predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
+        )
+        check_set_pb_arg(predict_net, "meta_architecture", "s", b"GeneralizedRCNN")
+
+    @mock_torch_nn_functional_interpolate()
+    def forward(self, inputs):
+        if not self.tensor_mode:
+            return self._wrapped_model.inference(inputs)
+        images = self._caffe2_preprocess_image(inputs)
+        features = self._wrapped_model.backbone(images.tensor)
+        proposals, _ = self._wrapped_model.proposal_generator(images, features)
+        with self.roi_heads_patcher.mock_roi_heads():
+            detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals)
+        return tuple(detector_results[0].flatten())
+
+    @staticmethod
+    def get_outputs_converter(predict_net, init_net):
+        def f(batched_inputs, c2_inputs, c2_results):
+            _, im_info = c2_inputs
+            image_sizes = [[int(im[0]), int(im[1])] for im in im_info]
+            results = assemble_rcnn_outputs_by_name(image_sizes, c2_results)
+            return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
+
+        return f
+
+
+class Caffe2PanopticFPN(Caffe2MetaArch):
+    def __init__(self, cfg, torch_model):
+        assert isinstance(torch_model, meta_arch.PanopticFPN)
+        torch_model = patch_generalized_rcnn(torch_model)
+        super().__init__(cfg, torch_model)
+
+        self.roi_heads_patcher = ROIHeadsPatcher(
+            self._wrapped_model.roi_heads, cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT
+        )
+
+    @mock_torch_nn_functional_interpolate()
+    def forward(self, inputs):
+        assert self.tensor_mode
+        images = self._caffe2_preprocess_image(inputs)
+        features = self._wrapped_model.backbone(images.tensor)
+
+        sem_seg_results, _ = self._wrapped_model.sem_seg_head(features)
+        sem_seg_results = alias(sem_seg_results, "sem_seg")
+
+        proposals, _ = self._wrapped_model.proposal_generator(images, features)
+
+        with self.roi_heads_patcher.mock_roi_heads(self.tensor_mode):
+            detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals)
+
+        return tuple(detector_results[0].flatten()) + (sem_seg_results,)
+
+    def encode_additional_info(self, predict_net, init_net):
+        size_divisibility = self._wrapped_model.backbone.size_divisibility
+        check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
+        check_set_pb_arg(
+            predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
+        )
+        check_set_pb_arg(predict_net, "meta_architecture", "s", b"PanopticFPN")
+
+        # Inference parameters:
+        check_set_pb_arg(
+            predict_net,
+            "combine_overlap_threshold",
+            "f",
+            _cast_to_f32(self._wrapped_model.combine_overlap_thresh),
+        )
+        check_set_pb_arg(
+            predict_net,
+            "combine_stuff_area_limit",
+            "i",
+            self._wrapped_model.combine_stuff_area_thresh,
+        )
+        check_set_pb_arg(
+            predict_net,
+            "combine_instances_confidence_threshold",
+            "f",
+            _cast_to_f32(self._wrapped_model.combine_instances_score_thresh),
+        )
+
+    @staticmethod
+    def get_outputs_converter(predict_net, init_net):
+        combine_overlap_threshold = get_pb_arg_valf(predict_net, "combine_overlap_threshold", None)
+        combine_stuff_area_limit = get_pb_arg_vali(predict_net, "combine_stuff_area_limit", None)
+        combine_instances_confidence_threshold = get_pb_arg_valf(
+            predict_net, "combine_instances_confidence_threshold", None
+        )
+
+        def f(batched_inputs, c2_inputs, c2_results):
+            _, im_info = c2_inputs
+            image_sizes = [[int(im[0]), int(im[1])] for im in im_info]
+            detector_results = assemble_rcnn_outputs_by_name(
+                image_sizes, c2_results, force_mask_on=True
+            )
+            sem_seg_results = c2_results["sem_seg"]
+
+            # copied from meta_arch/panoptic_fpn.py ...
+            processed_results = []
+            for sem_seg_result, detector_result, input_per_image, image_size in zip(
+                sem_seg_results, detector_results, batched_inputs, image_sizes
+            ):
+                height = input_per_image.get("height", image_size[0])
+                width = input_per_image.get("width", image_size[1])
+                sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
+                detector_r = detector_postprocess(detector_result, height, width)
+
+                processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r})
+
+                panoptic_r = combine_semantic_and_instance_outputs(
+                    detector_r,
+                    sem_seg_r.argmax(dim=0),
+                    combine_overlap_threshold,
+                    combine_stuff_area_limit,
+                    combine_instances_confidence_threshold,
+                )
+                processed_results[-1]["panoptic_seg"] = panoptic_r
+            return processed_results
+
+        return f
+
+
+class Caffe2RetinaNet(Caffe2MetaArch):
+    def __init__(self, cfg, torch_model):
+        assert isinstance(torch_model, meta_arch.RetinaNet)
+        super().__init__(cfg, torch_model)
+
+    @mock_torch_nn_functional_interpolate()
+    def forward(self, inputs):
+        assert self.tensor_mode
+        images = self._caffe2_preprocess_image(inputs)
+
+        # explicitly return the images sizes to avoid removing "im_info" by ONNX
+        # since it's not used in the forward path
+        return_tensors = [images.image_sizes]
+
+        features = self._wrapped_model.backbone(images.tensor)
+        features = [features[f] for f in self._wrapped_model.head_in_features]
+        for i, feature_i in enumerate(features):
+            features[i] = alias(feature_i, "feature_{}".format(i), is_backward=True)
+            return_tensors.append(features[i])
+
+        pred_logits, pred_anchor_deltas = self._wrapped_model.head(features)
+        for i, (box_cls_i, box_delta_i) in enumerate(zip(pred_logits, pred_anchor_deltas)):
+            return_tensors.append(alias(box_cls_i, "box_cls_{}".format(i)))
+            return_tensors.append(alias(box_delta_i, "box_delta_{}".format(i)))
+
+        return tuple(return_tensors)
+
+    def encode_additional_info(self, predict_net, init_net):
+        size_divisibility = self._wrapped_model.backbone.size_divisibility
+        check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
+        check_set_pb_arg(
+            predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
+        )
+        check_set_pb_arg(predict_net, "meta_architecture", "s", b"RetinaNet")
+
+        # Inference parameters:
+        check_set_pb_arg(
+            predict_net, "score_threshold", "f", _cast_to_f32(self._wrapped_model.test_score_thresh)
+        )
+        check_set_pb_arg(
+            predict_net, "topk_candidates", "i", self._wrapped_model.test_topk_candidates
+        )
+        check_set_pb_arg(
+            predict_net, "nms_threshold", "f", _cast_to_f32(self._wrapped_model.test_nms_thresh)
+        )
+        check_set_pb_arg(
+            predict_net,
+            "max_detections_per_image",
+            "i",
+            self._wrapped_model.max_detections_per_image,
+        )
+
+        check_set_pb_arg(
+            predict_net,
+            "bbox_reg_weights",
+            "floats",
+            [_cast_to_f32(w) for w in self._wrapped_model.box2box_transform.weights],
+        )
+        self._encode_anchor_generator_cfg(predict_net)
+
+    def _encode_anchor_generator_cfg(self, predict_net):
+        # serialize anchor_generator for future use
+        serialized_anchor_generator = io.BytesIO()
+        torch.save(self._wrapped_model.anchor_generator, serialized_anchor_generator)
+        # Ideally we can put anchor generating inside the model, then we don't
+        # need to store this information.
+        bytes = serialized_anchor_generator.getvalue()
+        check_set_pb_arg(predict_net, "serialized_anchor_generator", "s", bytes)
+
+    @staticmethod
+    def get_outputs_converter(predict_net, init_net):
+        self = types.SimpleNamespace()
+        serialized_anchor_generator = io.BytesIO(
+            get_pb_arg_vals(predict_net, "serialized_anchor_generator", None)
+        )
+        self.anchor_generator = torch.load(serialized_anchor_generator)
+        bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights", None)
+        self.box2box_transform = Box2BoxTransform(weights=tuple(bbox_reg_weights))
+        self.test_score_thresh = get_pb_arg_valf(predict_net, "score_threshold", None)
+        self.test_topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates", None)
+        self.test_nms_thresh = get_pb_arg_valf(predict_net, "nms_threshold", None)
+        self.max_detections_per_image = get_pb_arg_vali(
+            predict_net, "max_detections_per_image", None
+        )
+
+        # hack to reuse inference code from RetinaNet
+        self.inference = functools.partial(meta_arch.RetinaNet.inference, self)
+        self.inference_single_image = functools.partial(
+            meta_arch.RetinaNet.inference_single_image, self
+        )
+
+        def f(batched_inputs, c2_inputs, c2_results):
+            _, im_info = c2_inputs
+            image_sizes = [[int(im[0]), int(im[1])] for im in im_info]
+
+            num_features = len([x for x in c2_results.keys() if x.startswith("box_cls_")])
+            pred_logits = [c2_results["box_cls_{}".format(i)] for i in range(num_features)]
+            pred_anchor_deltas = [c2_results["box_delta_{}".format(i)] for i in range(num_features)]
+
+            # For each feature level, feature should have the same batch size and
+            # spatial dimension as the box_cls and box_delta.
+            dummy_features = [x.clone()[:, 0:0, :, :] for x in pred_logits]
+            anchors = self.anchor_generator(dummy_features)
+
+            # self.num_classess can be inferred
+            self.num_classes = pred_logits[0].shape[1] // (pred_anchor_deltas[0].shape[1] // 4)
+
+            pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits]
+            pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas]
+
+            results = self.inference(anchors, pred_logits, pred_anchor_deltas, image_sizes)
+            return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
+
+        return f
+
+
+META_ARCH_CAFFE2_EXPORT_TYPE_MAP = {
+    "GeneralizedRCNN": Caffe2GeneralizedRCNN,
+    "PanopticFPN": Caffe2PanopticFPN,
+    "RetinaNet": Caffe2RetinaNet,
+}
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_patch.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9eee594a27cdec29ce5f2b6f7730171eda3805e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/caffe2_patch.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import contextlib
+from unittest import mock
+import torch
+
+from detectron2.modeling import poolers
+from detectron2.modeling.proposal_generator import rpn
+from detectron2.modeling.roi_heads import keypoint_head, mask_head
+from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
+
+from .c10 import (
+    Caffe2Compatible,
+    Caffe2FastRCNNOutputsInference,
+    Caffe2KeypointRCNNInference,
+    Caffe2MaskRCNNInference,
+    Caffe2ROIPooler,
+    Caffe2RPN,
+)
+
+
+class GenericMixin(object):
+    pass
+
+
+class Caffe2CompatibleConverter(object):
+    """
+    A GenericUpdater which implements the `create_from` interface, by modifying
+    module object and assign it with another class replaceCls.
+    """
+
+    def __init__(self, replaceCls):
+        self.replaceCls = replaceCls
+
+    def create_from(self, module):
+        # update module's class to the new class
+        assert isinstance(module, torch.nn.Module)
+        if issubclass(self.replaceCls, GenericMixin):
+            # replaceCls should act as mixin, create a new class on-the-fly
+            new_class = type(
+                "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__),
+                (self.replaceCls, module.__class__),
+                {},  # {"new_method": lambda self: ...},
+            )
+            module.__class__ = new_class
+        else:
+            # replaceCls is complete class, this allow arbitrary class swap
+            module.__class__ = self.replaceCls
+
+        # initialize Caffe2Compatible
+        if isinstance(module, Caffe2Compatible):
+            module.tensor_mode = False
+
+        return module
+
+
+def patch(model, target, updater, *args, **kwargs):
+    """
+    recursively (post-order) update all modules with the target type and its
+    subclasses, make a initialization/composition/inheritance/... via the
+    updater.create_from.
+    """
+    for name, module in model.named_children():
+        model._modules[name] = patch(module, target, updater, *args, **kwargs)
+    if isinstance(model, target):
+        return updater.create_from(model, *args, **kwargs)
+    return model
+
+
+def patch_generalized_rcnn(model):
+    ccc = Caffe2CompatibleConverter
+    model = patch(model, rpn.RPN, ccc(Caffe2RPN))
+    model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler))
+
+    return model
+
+
+@contextlib.contextmanager
+def mock_fastrcnn_outputs_inference(
+    tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers
+):
+    with mock.patch.object(
+        box_predictor_type,
+        "inference",
+        autospec=True,
+        side_effect=Caffe2FastRCNNOutputsInference(tensor_mode),
+    ) as mocked_func:
+        yield
+    if check:
+        assert mocked_func.call_count > 0
+
+
+@contextlib.contextmanager
+def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True):
+    with mock.patch(
+        "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference()
+    ) as mocked_func:
+        yield
+    if check:
+        assert mocked_func.call_count > 0
+
+
+@contextlib.contextmanager
+def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True):
+    with mock.patch(
+        "{}.keypoint_rcnn_inference".format(patched_module),
+        side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint),
+    ) as mocked_func:
+        yield
+    if check:
+        assert mocked_func.call_count > 0
+
+
+class ROIHeadsPatcher:
+    def __init__(self, heads, use_heatmap_max_keypoint):
+        self.heads = heads
+        self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
+
+    @contextlib.contextmanager
+    def mock_roi_heads(self, tensor_mode=True):
+        """
+        Patching several inference functions inside ROIHeads and its subclasses
+
+        Args:
+            tensor_mode (bool): whether the inputs/outputs are caffe2's tensor
+                format or not. Default to True.
+        """
+        # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference`
+        # are called inside the same file as BaseXxxHead due to using mock.patch.
+        kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__
+        mask_head_mod = mask_head.BaseMaskRCNNHead.__module__
+
+        mock_ctx_managers = [
+            mock_fastrcnn_outputs_inference(
+                tensor_mode=tensor_mode,
+                check=True,
+                box_predictor_type=type(self.heads.box_predictor),
+            )
+        ]
+        if getattr(self.heads, "keypoint_on", False):
+            mock_ctx_managers += [
+                mock_keypoint_rcnn_inference(
+                    tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint
+                )
+            ]
+        if getattr(self.heads, "mask_on", False):
+            mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)]
+
+        with contextlib.ExitStack() as stack:  # python 3.3+
+            for mgr in mock_ctx_managers:
+                stack.enter_context(mgr)
+            yield
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/flatten.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/flatten.py
new file mode 100644
index 0000000000000000000000000000000000000000..b89c0f66897d01f1f04959ba6241e5b0fdbe56c6
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/flatten.py
@@ -0,0 +1,327 @@
+import collections
+from dataclasses import dataclass
+from typing import Callable, List, Optional, Tuple
+import torch
+from torch import nn
+
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.registry import _convert_target_to_string, locate
+
+from .torchscript_patch import patch_builtin_len
+
+
+@dataclass
+class Schema:
+    """
+    A Schema defines how to flatten a possibly hierarchical object into tuple of
+    primitive objects, so it can be used as inputs/outputs of PyTorch's tracing.
+
+    PyTorch does not support tracing a function that produces rich output
+    structures (e.g. dict, Instances, Boxes). To trace such a function, we
+    flatten the rich object into tuple of tensors, and return this tuple of tensors
+    instead. Meanwhile, we also need to know how to "rebuild" the original object
+    from the flattened results, so we can evaluate the flattened results.
+    A Schema defines how to flatten an object, and while flattening it, it records
+    necessary schemas so that the object can be rebuilt using the flattened outputs.
+
+    The flattened object and the schema object is returned by ``.flatten`` classmethod.
+    Then the original object can be rebuilt with the ``__call__`` method of schema.
+
+    A Schema is a dataclass that can be serialized easily.
+    """
+
+    # inspired by FetchMapper in tensorflow/python/client/session.py
+
+    @classmethod
+    def flatten(cls, obj):
+        raise NotImplementedError
+
+    def __call__(self, values):
+        raise NotImplementedError
+
+    @staticmethod
+    def _concat(values):
+        ret = ()
+        sizes = []
+        for v in values:
+            assert isinstance(v, tuple), "Flattened results must be a tuple"
+            ret = ret + v
+            sizes.append(len(v))
+        return ret, sizes
+
+    @staticmethod
+    def _split(values, sizes):
+        if len(sizes):
+            expected_len = sum(sizes)
+            assert (
+                len(values) == expected_len
+            ), f"Values has length {len(values)} but expect length {expected_len}."
+        ret = []
+        for k in range(len(sizes)):
+            begin, end = sum(sizes[:k]), sum(sizes[: k + 1])
+            ret.append(values[begin:end])
+        return ret
+
+
+@dataclass
+class ListSchema(Schema):
+    schemas: List[Schema]  # the schemas that define how to flatten each element in the list
+    sizes: List[int]  # the flattened length of each element
+
+    def __call__(self, values):
+        values = self._split(values, self.sizes)
+        if len(values) != len(self.schemas):
+            raise ValueError(
+                f"Values has length {len(values)} but schemas " f"has length {len(self.schemas)}!"
+            )
+        values = [m(v) for m, v in zip(self.schemas, values)]
+        return list(values)
+
+    @classmethod
+    def flatten(cls, obj):
+        res = [flatten_to_tuple(k) for k in obj]
+        values, sizes = cls._concat([k[0] for k in res])
+        return values, cls([k[1] for k in res], sizes)
+
+
+@dataclass
+class TupleSchema(ListSchema):
+    def __call__(self, values):
+        return tuple(super().__call__(values))
+
+
+@dataclass
+class IdentitySchema(Schema):
+    def __call__(self, values):
+        return values[0]
+
+    @classmethod
+    def flatten(cls, obj):
+        return (obj,), cls()
+
+
+@dataclass
+class DictSchema(ListSchema):
+    keys: List[str]
+
+    def __call__(self, values):
+        values = super().__call__(values)
+        return dict(zip(self.keys, values))
+
+    @classmethod
+    def flatten(cls, obj):
+        for k in obj.keys():
+            if not isinstance(k, str):
+                raise KeyError("Only support flattening dictionaries if keys are str.")
+        keys = sorted(obj.keys())
+        values = [obj[k] for k in keys]
+        ret, schema = ListSchema.flatten(values)
+        return ret, cls(schema.schemas, schema.sizes, keys)
+
+
+@dataclass
+class InstancesSchema(DictSchema):
+    def __call__(self, values):
+        image_size, fields = values[-1], values[:-1]
+        fields = super().__call__(fields)
+        return Instances(image_size, **fields)
+
+    @classmethod
+    def flatten(cls, obj):
+        ret, schema = super().flatten(obj.get_fields())
+        size = obj.image_size
+        if not isinstance(size, torch.Tensor):
+            size = torch.tensor(size)
+        return ret + (size,), schema
+
+
+@dataclass
+class TensorWrapSchema(Schema):
+    """
+    For classes that are simple wrapper of tensors, e.g.
+    Boxes, RotatedBoxes, BitMasks
+    """
+
+    class_name: str
+
+    def __call__(self, values):
+        return locate(self.class_name)(values[0])
+
+    @classmethod
+    def flatten(cls, obj):
+        return (obj.tensor,), cls(_convert_target_to_string(type(obj)))
+
+
+# if more custom structures needed in the future, can allow
+# passing in extra schemas for custom types
+def flatten_to_tuple(obj):
+    """
+    Flatten an object so it can be used for PyTorch tracing.
+    Also returns how to rebuild the original object from the flattened outputs.
+
+    Returns:
+        res (tuple): the flattened results that can be used as tracing outputs
+        schema: an object with a ``__call__`` method such that ``schema(res) == obj``.
+             It is a pure dataclass that can be serialized.
+    """
+    schemas = [
+        ((str, bytes), IdentitySchema),
+        (list, ListSchema),
+        (tuple, TupleSchema),
+        (collections.abc.Mapping, DictSchema),
+        (Instances, InstancesSchema),
+        (Boxes, TensorWrapSchema),
+    ]
+    for klass, schema in schemas:
+        if isinstance(obj, klass):
+            F = schema
+            break
+    else:
+        F = IdentitySchema
+
+    return F.flatten(obj)
+
+
+class TracingAdapter(nn.Module):
+    """
+    A model may take rich input/output format (e.g. dict or custom classes),
+    but `torch.jit.trace` requires tuple of tensors as input/output.
+    This adapter flattens input/output format of a model so it becomes traceable.
+
+    It also records the necessary schema to rebuild model's inputs/outputs from flattened
+    inputs/outputs.
+
+    Example:
+    ::
+        outputs = model(inputs)   # inputs/outputs may be rich structure
+        adapter = TracingAdapter(model, inputs)
+
+        # can now trace the model, with adapter.flattened_inputs, or another
+        # tuple of tensors with the same length and meaning
+        traced = torch.jit.trace(adapter, adapter.flattened_inputs)
+
+        # traced model can only produce flattened outputs (tuple of tensors)
+        flattened_outputs = traced(*adapter.flattened_inputs)
+        # adapter knows the schema to convert it back (new_outputs == outputs)
+        new_outputs = adapter.outputs_schema(flattened_outputs)
+    """
+
+    flattened_inputs: Tuple[torch.Tensor] = None
+    """
+    Flattened version of inputs given to this class's constructor.
+    """
+
+    inputs_schema: Schema = None
+    """
+    Schema of the inputs given to this class's constructor.
+    """
+
+    outputs_schema: Schema = None
+    """
+    Schema of the output produced by calling the given model with inputs.
+    """
+
+    def __init__(
+        self,
+        model: nn.Module,
+        inputs,
+        inference_func: Optional[Callable] = None,
+        allow_non_tensor: bool = False,
+    ):
+        """
+        Args:
+            model: an nn.Module
+            inputs: An input argument or a tuple of input arguments used to call model.
+                After flattening, it has to only consist of tensors.
+            inference_func: a callable that takes (model, *inputs), calls the
+                model with inputs, and return outputs. By default it
+                is ``lambda model, *inputs: model(*inputs)``. Can be override
+                if you need to call the model differently.
+            allow_non_tensor: allow inputs/outputs to contain non-tensor objects.
+                This option will filter out non-tensor objects to make the
+                model traceable, but ``inputs_schema``/``outputs_schema`` cannot be
+                used anymore because inputs/outputs cannot be rebuilt from pure tensors.
+                This is useful when you're only interested in the single trace of
+                execution (e.g. for flop count), but not interested in
+                generalizing the traced graph to new inputs.
+        """
+        super().__init__()
+        if isinstance(model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)):
+            model = model.module
+        self.model = model
+        if not isinstance(inputs, tuple):
+            inputs = (inputs,)
+        self.inputs = inputs
+        self.allow_non_tensor = allow_non_tensor
+
+        if inference_func is None:
+            inference_func = lambda model, *inputs: model(*inputs)  # noqa
+        self.inference_func = inference_func
+
+        self.flattened_inputs, self.inputs_schema = flatten_to_tuple(inputs)
+
+        if all(isinstance(x, torch.Tensor) for x in self.flattened_inputs):
+            return
+        if self.allow_non_tensor:
+            self.flattened_inputs = tuple(
+                [x for x in self.flattened_inputs if isinstance(x, torch.Tensor)]
+            )
+            self.inputs_schema = None
+        else:
+            for input in self.flattened_inputs:
+                if not isinstance(input, torch.Tensor):
+                    raise ValueError(
+                        "Inputs for tracing must only contain tensors. "
+                        f"Got a {type(input)} instead."
+                    )
+
+    def forward(self, *args: torch.Tensor):
+        with torch.no_grad(), patch_builtin_len():
+            if self.inputs_schema is not None:
+                inputs_orig_format = self.inputs_schema(args)
+            else:
+                if args != self.flattened_inputs:
+                    raise ValueError(
+                        "TracingAdapter does not contain valid inputs_schema."
+                        " So it cannot generalize to other inputs and must be"
+                        " traced with `.flattened_inputs`."
+                    )
+                inputs_orig_format = self.inputs
+
+            outputs = self.inference_func(self.model, *inputs_orig_format)
+            flattened_outputs, schema = flatten_to_tuple(outputs)
+
+            flattened_output_tensors = tuple(
+                [x for x in flattened_outputs if isinstance(x, torch.Tensor)]
+            )
+            if len(flattened_output_tensors) < len(flattened_outputs):
+                if self.allow_non_tensor:
+                    flattened_outputs = flattened_output_tensors
+                    self.outputs_schema = None
+                else:
+                    raise ValueError(
+                        "Model cannot be traced because some model outputs "
+                        "cannot flatten to tensors."
+                    )
+            else:  # schema is valid
+                if self.outputs_schema is None:
+                    self.outputs_schema = schema
+                else:
+                    assert self.outputs_schema == schema, (
+                        "Model should always return outputs with the same "
+                        "structure so it can be traced!"
+                    )
+            return flattened_outputs
+
+    def _create_wrapper(self, traced_model):
+        """
+        Return a function that has an input/output interface the same as the
+        original model, but it calls the given traced model under the hood.
+        """
+
+        def forward(*args):
+            flattened_inputs, _ = flatten_to_tuple(args)
+            flattened_outputs = traced_model(*flattened_inputs)
+            return self.outputs_schema(flattened_outputs)
+
+        return forward
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/shared.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/shared.py
new file mode 100644
index 0000000000000000000000000000000000000000..857cc9711dc175835bd6cfa28f877f70063cb94f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/shared.py
@@ -0,0 +1,1034 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import collections
+import contextlib
+import copy
+import functools
+import logging
+import numpy as np
+import os
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from unittest import mock
+import caffe2.python.utils as putils
+import torch
+import torch.nn.functional as F
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core, net_drawer, workspace
+from torch.nn.functional import interpolate as interp
+
+logger = logging.getLogger(__name__)
+
+
+# ==== torch/utils_toffee/cast.py =======================================
+
+
+def to_device(t, device_str):
+    """
+    This function is a replacement of .to(another_device) such that it allows the
+    casting to be traced properly by explicitly calling the underlying copy ops.
+    It also avoids introducing unncessary op when casting to the same device.
+    """
+    src = t.device
+    dst = torch.device(device_str)
+
+    if src == dst:
+        return t
+    elif src.type == "cuda" and dst.type == "cpu":
+        return torch.ops._caffe2.CopyGPUToCPU(t)
+    elif src.type == "cpu" and dst.type == "cuda":
+        return torch.ops._caffe2.CopyCPUToGPU(t)
+    else:
+        raise RuntimeError("Can't cast tensor from device {} to device {}".format(src, dst))
+
+
+# ==== torch/utils_toffee/interpolate.py =======================================
+
+
+# Note: borrowed from vision/detection/fair/detectron/detectron/modeling/detector.py
+def BilinearInterpolation(tensor_in, up_scale):
+    assert up_scale % 2 == 0, "Scale should be even"
+
+    def upsample_filt(size):
+        factor = (size + 1) // 2
+        if size % 2 == 1:
+            center = factor - 1
+        else:
+            center = factor - 0.5
+
+        og = np.ogrid[:size, :size]
+        return (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
+
+    kernel_size = int(up_scale) * 2
+    bil_filt = upsample_filt(kernel_size)
+
+    dim = int(tensor_in.shape[1])
+    kernel = np.zeros((dim, dim, kernel_size, kernel_size), dtype=np.float32)
+    kernel[range(dim), range(dim), :, :] = bil_filt
+
+    tensor_out = F.conv_transpose2d(
+        tensor_in,
+        weight=to_device(torch.Tensor(kernel), tensor_in.device),
+        bias=None,
+        stride=int(up_scale),
+        padding=int(up_scale / 2),
+    )
+
+    return tensor_out
+
+
+# NOTE: ONNX is incompatible with traced torch.nn.functional.interpolate if
+# using dynamic `scale_factor` rather than static `size`. (T43166860)
+# NOTE: Caffe2 Int8 conversion might not be able to quantize `size` properly.
+def onnx_compatibale_interpolate(
+    input, size=None, scale_factor=None, mode="nearest", align_corners=None
+):
+    # NOTE: The input dimensions are interpreted in the form:
+    # `mini-batch x channels x [optional depth] x [optional height] x width`.
+    if size is None and scale_factor is not None:
+        if input.dim() == 4:
+            if isinstance(scale_factor, (int, float)):
+                height_scale, width_scale = (scale_factor, scale_factor)
+            else:
+                assert isinstance(scale_factor, (tuple, list))
+                assert len(scale_factor) == 2
+                height_scale, width_scale = scale_factor
+
+            assert not align_corners, "No matching C2 op for align_corners == True"
+            if mode == "nearest":
+                return torch.ops._caffe2.ResizeNearest(
+                    input, order="NCHW", width_scale=width_scale, height_scale=height_scale
+                )
+            elif mode == "bilinear":
+                logger.warning(
+                    "Use F.conv_transpose2d for bilinear interpolate"
+                    " because there's no such C2 op, this may cause significant"
+                    " slowdown and the boundary pixels won't be as same as"
+                    " using F.interpolate due to padding."
+                )
+                assert height_scale == width_scale
+                return BilinearInterpolation(input, up_scale=height_scale)
+        logger.warning("Output size is not static, it might cause ONNX conversion issue")
+
+    return interp(input, size, scale_factor, mode, align_corners)
+
+
+@contextlib.contextmanager
+def mock_torch_nn_functional_interpolate():
+    if torch.onnx.is_in_onnx_export():
+        with mock.patch(
+            "torch.nn.functional.interpolate", side_effect=onnx_compatibale_interpolate
+        ):
+            yield
+    else:
+        yield
+
+
+# ==== torch/utils_caffe2/ws_utils.py ==========================================
+
+
+class ScopedWS(object):
+    def __init__(self, ws_name, is_reset, is_cleanup=False):
+        self.ws_name = ws_name
+        self.is_reset = is_reset
+        self.is_cleanup = is_cleanup
+        self.org_ws = ""
+
+    def __enter__(self):
+        self.org_ws = workspace.CurrentWorkspace()
+        if self.ws_name is not None:
+            workspace.SwitchWorkspace(self.ws_name, True)
+        if self.is_reset:
+            workspace.ResetWorkspace()
+
+        return workspace
+
+    def __exit__(self, *args):
+        if self.is_cleanup:
+            workspace.ResetWorkspace()
+        if self.ws_name is not None:
+            workspace.SwitchWorkspace(self.org_ws)
+
+
+def fetch_any_blob(name):
+    bb = None
+    try:
+        bb = workspace.FetchBlob(name)
+    except TypeError:
+        bb = workspace.FetchInt8Blob(name)
+    except Exception as e:
+        logger.error("Get blob {} error: {}".format(name, e))
+
+    return bb
+
+
+# ==== torch/utils_caffe2/protobuf.py ==========================================
+
+
+def get_pb_arg(pb, arg_name):
+    for x in pb.arg:
+        if x.name == arg_name:
+            return x
+    return None
+
+
+def get_pb_arg_valf(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return arg.f if arg is not None else default_val
+
+
+def get_pb_arg_floats(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return list(map(float, arg.floats)) if arg is not None else default_val
+
+
+def get_pb_arg_ints(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return list(map(int, arg.ints)) if arg is not None else default_val
+
+
+def get_pb_arg_vali(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return arg.i if arg is not None else default_val
+
+
+def get_pb_arg_vals(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return arg.s if arg is not None else default_val
+
+
+def get_pb_arg_valstrings(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return list(arg.strings) if arg is not None else default_val
+
+
+def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False):
+    arg = get_pb_arg(pb, arg_name)
+    if arg is None:
+        arg = putils.MakeArgument(arg_name, arg_value)
+        assert hasattr(arg, arg_attr)
+        pb.arg.extend([arg])
+    if allow_override and getattr(arg, arg_attr) != arg_value:
+        logger.warning(
+            "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value)
+        )
+        setattr(arg, arg_attr, arg_value)
+    else:
+        assert arg is not None
+        assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format(
+            getattr(arg, arg_attr), arg_value
+        )
+
+
+def _create_const_fill_op_from_numpy(name, tensor, device_option=None):
+    assert type(tensor) == np.ndarray
+    kTypeNameMapper = {
+        np.dtype("float32"): "GivenTensorFill",
+        np.dtype("int32"): "GivenTensorIntFill",
+        np.dtype("int64"): "GivenTensorInt64Fill",
+        np.dtype("uint8"): "GivenTensorStringFill",
+    }
+
+    args_dict = {}
+    if tensor.dtype == np.dtype("uint8"):
+        args_dict.update({"values": [str(tensor.data)], "shape": [1]})
+    else:
+        args_dict.update({"values": tensor, "shape": tensor.shape})
+
+    if device_option is not None:
+        args_dict["device_option"] = device_option
+
+    return core.CreateOperator(kTypeNameMapper[tensor.dtype], [], [name], **args_dict)
+
+
+def _create_const_fill_op_from_c2_int8_tensor(name, int8_tensor):
+    assert type(int8_tensor) == workspace.Int8Tensor
+    kTypeNameMapper = {
+        np.dtype("int32"): "Int8GivenIntTensorFill",
+        np.dtype("uint8"): "Int8GivenTensorFill",
+    }
+
+    tensor = int8_tensor.data
+    assert tensor.dtype in [np.dtype("uint8"), np.dtype("int32")]
+    values = tensor.tobytes() if tensor.dtype == np.dtype("uint8") else tensor
+
+    return core.CreateOperator(
+        kTypeNameMapper[tensor.dtype],
+        [],
+        [name],
+        values=values,
+        shape=tensor.shape,
+        Y_scale=int8_tensor.scale,
+        Y_zero_point=int8_tensor.zero_point,
+    )
+
+
+def create_const_fill_op(
+    name: str,
+    blob: Union[np.ndarray, workspace.Int8Tensor],
+    device_option: Optional[caffe2_pb2.DeviceOption] = None,
+) -> caffe2_pb2.OperatorDef:
+    """
+    Given a blob object, return the Caffe2 operator that creates this blob
+    as constant. Currently support NumPy tensor and Caffe2 Int8Tensor.
+    """
+
+    tensor_type = type(blob)
+    assert tensor_type in [
+        np.ndarray,
+        workspace.Int8Tensor,
+    ], 'Error when creating const fill op for "{}", unsupported blob type: {}'.format(
+        name, type(blob)
+    )
+
+    if tensor_type == np.ndarray:
+        return _create_const_fill_op_from_numpy(name, blob, device_option)
+    elif tensor_type == workspace.Int8Tensor:
+        assert device_option is None
+        return _create_const_fill_op_from_c2_int8_tensor(name, blob)
+
+
+def construct_init_net_from_params(
+    params: Dict[str, Any], device_options: Optional[Dict[str, caffe2_pb2.DeviceOption]] = None
+) -> caffe2_pb2.NetDef:
+    """
+    Construct the init_net from params dictionary
+    """
+    init_net = caffe2_pb2.NetDef()
+    device_options = device_options or {}
+    for name, blob in params.items():
+        if isinstance(blob, str):
+            logger.warning(
+                (
+                    "Blob {} with type {} is not supported in generating init net,"
+                    " skipped.".format(name, type(blob))
+                )
+            )
+            continue
+        init_net.op.extend(
+            [create_const_fill_op(name, blob, device_option=device_options.get(name, None))]
+        )
+        init_net.external_output.append(name)
+    return init_net
+
+
+def get_producer_map(ssa):
+    """
+    Return dict from versioned blob to (i, j),
+        where i is index of producer op, j is the index of output of that op.
+    """
+    producer_map = {}
+    for i in range(len(ssa)):
+        outputs = ssa[i][1]
+        for j, outp in enumerate(outputs):
+            producer_map[outp] = (i, j)
+    return producer_map
+
+
+def get_consumer_map(ssa):
+    """
+    Return dict from versioned blob to list of (i, j),
+        where i is index of consumer op, j is the index of input of that op.
+    """
+    consumer_map = collections.defaultdict(list)
+    for i in range(len(ssa)):
+        inputs = ssa[i][0]
+        for j, inp in enumerate(inputs):
+            consumer_map[inp].append((i, j))
+    return consumer_map
+
+
+def get_params_from_init_net(
+    init_net: caffe2_pb2.NetDef,
+) -> [Dict[str, Any], Dict[str, caffe2_pb2.DeviceOption]]:
+    """
+    Take the output blobs from init_net by running it.
+    Outputs:
+        params: dict from blob name to numpy array
+        device_options: dict from blob name to the device option of its creating op
+    """
+    # NOTE: this assumes that the params is determined by producer op with the
+    # only exception be CopyGPUToCPU which is CUDA op but returns CPU tensor.
+    def _get_device_option(producer_op):
+        if producer_op.type == "CopyGPUToCPU":
+            return caffe2_pb2.DeviceOption()
+        else:
+            return producer_op.device_option
+
+    with ScopedWS("__get_params_from_init_net__", is_reset=True, is_cleanup=True) as ws:
+        ws.RunNetOnce(init_net)
+        params = {b: fetch_any_blob(b) for b in init_net.external_output}
+    ssa, versions = core.get_ssa(init_net)
+    producer_map = get_producer_map(ssa)
+    device_options = {
+        b: _get_device_option(init_net.op[producer_map[(b, versions[b])][0]])
+        for b in init_net.external_output
+    }
+    return params, device_options
+
+
+def _updater_raise(op, input_types, output_types):
+    raise RuntimeError(
+        "Failed to apply updater for op {} given input_types {} and"
+        " output_types {}".format(op, input_types, output_types)
+    )
+
+
+def _generic_status_identifier(
+    predict_net: caffe2_pb2.NetDef,
+    status_updater: Callable,
+    known_status: Dict[Tuple[str, int], Any],
+) -> Dict[Tuple[str, int], Any]:
+    """
+    Statically infer the status of each blob, the status can be such as device type
+        (CPU/GPU), layout (NCHW/NHWC), data type (float32/int8), etc. "Blob" here
+        is versioned blob (Tuple[str, int]) in the format compatible with ssa.
+    Inputs:
+        predict_net: the caffe2 network
+        status_updater: a callable, given an op and the status of its input/output,
+            it returns the updated status of input/output. `None` is used for
+            representing unknown status.
+        known_status: a dict containing known status, used as initialization.
+    Outputs:
+        A dict mapping from versioned blob to its status
+    """
+    ssa, versions = core.get_ssa(predict_net)
+    versioned_ext_input = [(b, 0) for b in predict_net.external_input]
+    versioned_ext_output = [(b, versions[b]) for b in predict_net.external_output]
+    all_versioned_blobs = set().union(*[set(x[0] + x[1]) for x in ssa])
+
+    allowed_vbs = all_versioned_blobs.union(versioned_ext_input).union(versioned_ext_output)
+    assert all(k in allowed_vbs for k in known_status)
+    assert all(v is not None for v in known_status.values())
+    _known_status = copy.deepcopy(known_status)
+
+    def _check_and_update(key, value):
+        assert value is not None
+        if key in _known_status:
+            if not _known_status[key] == value:
+                raise RuntimeError(
+                    "Confilict status for {}, existing status {}, new status {}".format(
+                        key, _known_status[key], value
+                    )
+                )
+        _known_status[key] = value
+
+    def _update_i(op, ssa_i):
+        versioned_inputs = ssa_i[0]
+        versioned_outputs = ssa_i[1]
+
+        inputs_status = [_known_status.get(b, None) for b in versioned_inputs]
+        outputs_status = [_known_status.get(b, None) for b in versioned_outputs]
+
+        new_inputs_status, new_outputs_status = status_updater(op, inputs_status, outputs_status)
+
+        for versioned_blob, status in zip(
+            versioned_inputs + versioned_outputs, new_inputs_status + new_outputs_status
+        ):
+            if status is not None:
+                _check_and_update(versioned_blob, status)
+
+    for op, ssa_i in zip(predict_net.op, ssa):
+        _update_i(op, ssa_i)
+    for op, ssa_i in zip(reversed(predict_net.op), reversed(ssa)):
+        _update_i(op, ssa_i)
+
+    # NOTE: This strictly checks all the blob from predict_net must be assgined
+    # a known status. However sometimes it's impossible (eg. having deadend op),
+    # we may relax this constraint if
+    for k in all_versioned_blobs:
+        if k not in _known_status:
+            raise NotImplementedError(
+                "Can not infer the status for {}. Currently only support the case where"
+                " a single forward and backward pass can identify status for all blobs.".format(k)
+            )
+
+    return _known_status
+
+
+def infer_device_type(
+    predict_net: caffe2_pb2.NetDef,
+    known_status: Dict[Tuple[str, int], Any],
+    device_name_style: str = "caffe2",
+) -> Dict[Tuple[str, int], str]:
+    """ Return the device type ("cpu" or "gpu"/"cuda") of each (versioned) blob """
+
+    assert device_name_style in ["caffe2", "pytorch"]
+    _CPU_STR = "cpu"
+    _GPU_STR = "gpu" if device_name_style == "caffe2" else "cuda"
+
+    def _copy_cpu_to_gpu_updater(op, input_types, output_types):
+        if input_types[0] == _GPU_STR or output_types[0] == _CPU_STR:
+            _updater_raise(op, input_types, output_types)
+        return ([_CPU_STR], [_GPU_STR])
+
+    def _copy_gpu_to_cpu_updater(op, input_types, output_types):
+        if input_types[0] == _CPU_STR or output_types[0] == _GPU_STR:
+            _updater_raise(op, input_types, output_types)
+        return ([_GPU_STR], [_CPU_STR])
+
+    def _other_ops_updater(op, input_types, output_types):
+        non_none_types = [x for x in input_types + output_types if x is not None]
+        if len(non_none_types) > 0:
+            the_type = non_none_types[0]
+            if not all(x == the_type for x in non_none_types):
+                _updater_raise(op, input_types, output_types)
+        else:
+            the_type = None
+        return ([the_type for _ in op.input], [the_type for _ in op.output])
+
+    def _device_updater(op, *args, **kwargs):
+        return {
+            "CopyCPUToGPU": _copy_cpu_to_gpu_updater,
+            "CopyGPUToCPU": _copy_gpu_to_cpu_updater,
+        }.get(op.type, _other_ops_updater)(op, *args, **kwargs)
+
+    return _generic_status_identifier(predict_net, _device_updater, known_status)
+
+
+# ==== torch/utils_caffe2/vis.py ===============================================
+
+
+def _modify_blob_names(ops, blob_rename_f):
+    ret = []
+
+    def _replace_list(blob_list, replaced_list):
+        del blob_list[:]
+        blob_list.extend(replaced_list)
+
+    for x in ops:
+        cur = copy.deepcopy(x)
+        _replace_list(cur.input, list(map(blob_rename_f, cur.input)))
+        _replace_list(cur.output, list(map(blob_rename_f, cur.output)))
+        ret.append(cur)
+
+    return ret
+
+
+def _rename_blob(name, blob_sizes, blob_ranges):
+    def _list_to_str(bsize):
+        ret = ", ".join([str(x) for x in bsize])
+        ret = "[" + ret + "]"
+        return ret
+
+    ret = name
+    if blob_sizes is not None and name in blob_sizes:
+        ret += "\n" + _list_to_str(blob_sizes[name])
+    if blob_ranges is not None and name in blob_ranges:
+        ret += "\n" + _list_to_str(blob_ranges[name])
+
+    return ret
+
+
+# graph_name could not contain word 'graph'
+def save_graph(net, file_name, graph_name="net", op_only=True, blob_sizes=None, blob_ranges=None):
+    blob_rename_f = functools.partial(_rename_blob, blob_sizes=blob_sizes, blob_ranges=blob_ranges)
+    return save_graph_base(net, file_name, graph_name, op_only, blob_rename_f)
+
+
+def save_graph_base(net, file_name, graph_name="net", op_only=True, blob_rename_func=None):
+    graph = None
+    ops = net.op
+    if blob_rename_func is not None:
+        ops = _modify_blob_names(ops, blob_rename_func)
+    if not op_only:
+        graph = net_drawer.GetPydotGraph(ops, graph_name, rankdir="TB")
+    else:
+        graph = net_drawer.GetPydotGraphMinimal(
+            ops, graph_name, rankdir="TB", minimal_dependency=True
+        )
+
+    try:
+        par_dir = os.path.dirname(file_name)
+        if not os.path.exists(par_dir):
+            os.makedirs(par_dir)
+
+        format = os.path.splitext(os.path.basename(file_name))[-1]
+        if format == ".png":
+            graph.write_png(file_name)
+        elif format == ".pdf":
+            graph.write_pdf(file_name)
+        elif format == ".svg":
+            graph.write_svg(file_name)
+        else:
+            print("Incorrect format {}".format(format))
+    except Exception as e:
+        print("Error when writing graph to image {}".format(e))
+
+    return graph
+
+
+# ==== torch/utils_toffee/aten_to_caffe2.py ====================================
+
+
+def group_norm_replace_aten_with_caffe2(predict_net: caffe2_pb2.NetDef):
+    """
+    For ONNX exported model, GroupNorm will be represented as ATen op,
+        this can be a drop in replacement from ATen to GroupNorm
+    """
+    count = 0
+    for op in predict_net.op:
+        if op.type == "ATen":
+            op_name = get_pb_arg_vals(op, "operator", None)  # return byte in py3
+            if op_name and op_name.decode() == "group_norm":
+                op.arg.remove(get_pb_arg(op, "operator"))
+
+                if get_pb_arg_vali(op, "cudnn_enabled", None):
+                    op.arg.remove(get_pb_arg(op, "cudnn_enabled"))
+
+                num_groups = get_pb_arg_vali(op, "num_groups", None)
+                if num_groups is not None:
+                    op.arg.remove(get_pb_arg(op, "num_groups"))
+                    check_set_pb_arg(op, "group", "i", num_groups)
+
+                op.type = "GroupNorm"
+                count += 1
+    if count > 1:
+        logger.info("Replaced {} ATen operator to GroupNormOp".format(count))
+
+
+# ==== torch/utils_toffee/alias.py =============================================
+
+
+def alias(x, name, is_backward=False):
+    if not torch.onnx.is_in_onnx_export():
+        return x
+    assert isinstance(x, torch.Tensor)
+    return torch.ops._caffe2.AliasWithName(x, name, is_backward=is_backward)
+
+
+def fuse_alias_placeholder(predict_net, init_net):
+    """ Remove AliasWithName placeholder and rename the input/output of it """
+    # First we finish all the re-naming
+    for i, op in enumerate(predict_net.op):
+        if op.type == "AliasWithName":
+            assert len(op.input) == 1
+            assert len(op.output) == 1
+            name = get_pb_arg_vals(op, "name", None).decode()
+            is_backward = bool(get_pb_arg_vali(op, "is_backward", 0))
+            rename_op_input(predict_net, init_net, i, 0, name, from_producer=is_backward)
+            rename_op_output(predict_net, i, 0, name)
+
+    # Remove AliasWithName, should be very safe since it's a non-op
+    new_ops = []
+    for op in predict_net.op:
+        if op.type != "AliasWithName":
+            new_ops.append(op)
+        else:
+            # safety check
+            assert op.input == op.output
+            assert op.input[0] == op.arg[0].s.decode()
+    del predict_net.op[:]
+    predict_net.op.extend(new_ops)
+
+
+# ==== torch/utils_caffe2/graph_transform.py ===================================
+
+
+class IllegalGraphTransformError(ValueError):
+    """ When a graph transform function call can't be executed. """
+
+
+def _rename_versioned_blob_in_proto(
+    proto: caffe2_pb2.NetDef,
+    old_name: str,
+    new_name: str,
+    version: int,
+    ssa: List[Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]],
+    start_versions: Dict[str, int],
+    end_versions: Dict[str, int],
+):
+    """ In given proto, rename all blobs with matched version """
+    # Operater list
+    for op, i_th_ssa in zip(proto.op, ssa):
+        versioned_inputs, versioned_outputs = i_th_ssa
+        for i in range(len(op.input)):
+            if versioned_inputs[i] == (old_name, version):
+                op.input[i] = new_name
+        for i in range(len(op.output)):
+            if versioned_outputs[i] == (old_name, version):
+                op.output[i] = new_name
+    # external_input
+    if start_versions.get(old_name, 0) == version:
+        for i in range(len(proto.external_input)):
+            if proto.external_input[i] == old_name:
+                proto.external_input[i] = new_name
+    # external_output
+    if end_versions.get(old_name, 0) == version:
+        for i in range(len(proto.external_output)):
+            if proto.external_output[i] == old_name:
+                proto.external_output[i] = new_name
+
+
+def rename_op_input(
+    predict_net: caffe2_pb2.NetDef,
+    init_net: caffe2_pb2.NetDef,
+    op_id: int,
+    input_id: int,
+    new_name: str,
+    from_producer: bool = False,
+):
+    """
+    Rename the op_id-th operator in predict_net, change it's input_id-th input's
+        name to the new_name. It also does automatic re-route and change
+        external_input and init_net if necessary.
+    - It requires the input is only consumed by this op.
+    - This function modifies predict_net and init_net in-place.
+    - When from_producer is enable, this also updates other operators that consumes
+        the same input. Be cautious because may trigger unintended behavior.
+    """
+    assert isinstance(predict_net, caffe2_pb2.NetDef)
+    assert isinstance(init_net, caffe2_pb2.NetDef)
+
+    init_net_ssa, init_net_versions = core.get_ssa(init_net)
+    predict_net_ssa, predict_net_versions = core.get_ssa(
+        predict_net, copy.deepcopy(init_net_versions)
+    )
+
+    versioned_inputs, versioned_outputs = predict_net_ssa[op_id]
+    old_name, version = versioned_inputs[input_id]
+
+    if from_producer:
+        producer_map = get_producer_map(predict_net_ssa)
+        if not (old_name, version) in producer_map:
+            raise NotImplementedError(
+                "Can't find producer, the input {} is probably from"
+                " init_net, this is not supported yet.".format(old_name)
+            )
+        producer = producer_map[(old_name, version)]
+        rename_op_output(predict_net, producer[0], producer[1], new_name)
+        return
+
+    def contain_targets(op_ssa):
+        return (old_name, version) in op_ssa[0]
+
+    is_consumer = [contain_targets(op_ssa) for op_ssa in predict_net_ssa]
+    if sum(is_consumer) > 1:
+        raise IllegalGraphTransformError(
+            (
+                "Input '{}' of operator(#{}) are consumed by other ops, please use"
+                + " rename_op_output on the producer instead. Offending op: \n{}"
+            ).format(old_name, op_id, predict_net.op[op_id])
+        )
+
+    # update init_net
+    _rename_versioned_blob_in_proto(
+        init_net, old_name, new_name, version, init_net_ssa, {}, init_net_versions
+    )
+    # update predict_net
+    _rename_versioned_blob_in_proto(
+        predict_net,
+        old_name,
+        new_name,
+        version,
+        predict_net_ssa,
+        init_net_versions,
+        predict_net_versions,
+    )
+
+
+def rename_op_output(predict_net: caffe2_pb2.NetDef, op_id: int, output_id: int, new_name: str):
+    """
+    Rename the op_id-th operator in predict_net, change it's output_id-th input's
+        name to the new_name. It also does automatic re-route and change
+        external_output and if necessary.
+    - It allows multiple consumers of its output.
+    - This function modifies predict_net in-place, doesn't need init_net.
+    """
+    assert isinstance(predict_net, caffe2_pb2.NetDef)
+
+    ssa, blob_versions = core.get_ssa(predict_net)
+
+    versioned_inputs, versioned_outputs = ssa[op_id]
+    old_name, version = versioned_outputs[output_id]
+
+    # update predict_net
+    _rename_versioned_blob_in_proto(
+        predict_net, old_name, new_name, version, ssa, {}, blob_versions
+    )
+
+
+def get_sub_graph_external_input_output(
+    predict_net: caffe2_pb2.NetDef, sub_graph_op_indices: List[int]
+) -> Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]:
+    """
+    Return the list of external input/output of sub-graph,
+    each element is tuple of the name and corresponding version in predict_net.
+
+    external input/output is defined the same way as caffe2 NetDef.
+    """
+    ssa, versions = core.get_ssa(predict_net)
+
+    all_inputs = []
+    all_outputs = []
+    for op_id in sub_graph_op_indices:
+        all_inputs += [inp for inp in ssa[op_id][0] if inp not in all_inputs]
+        all_outputs += list(ssa[op_id][1])  # ssa output won't repeat
+
+    # for versioned blobs, external inputs are just those blob in all_inputs
+    # but not in all_outputs
+    ext_inputs = [inp for inp in all_inputs if inp not in all_outputs]
+
+    # external outputs are essentially outputs of this subgraph that are used
+    # outside of this sub-graph (including predict_net.external_output)
+    all_other_inputs = sum(
+        (ssa[i][0] for i in range(len(ssa)) if i not in sub_graph_op_indices),
+        [(outp, versions[outp]) for outp in predict_net.external_output],
+    )
+    ext_outputs = [outp for outp in all_outputs if outp in set(all_other_inputs)]
+
+    return ext_inputs, ext_outputs
+
+
+class DiGraph:
+    """ A DAG representation of caffe2 graph, each vertice is a versioned blob. """
+
+    def __init__(self):
+        self.vertices = set()
+        self.graph = collections.defaultdict(list)
+
+    def add_edge(self, u, v):
+        self.graph[u].append(v)
+        self.vertices.add(u)
+        self.vertices.add(v)
+
+    # grab from https://www.geeksforgeeks.org/find-paths-given-source-destination/
+    def get_all_paths(self, s, d):
+        visited = {k: False for k in self.vertices}
+        path = []
+        all_paths = []
+
+        def _get_all_paths_util(graph, u, d, visited, path):
+            visited[u] = True
+            path.append(u)
+            if u == d:
+                all_paths.append(copy.deepcopy(path))
+            else:
+                for i in graph[u]:
+                    if not visited[i]:
+                        _get_all_paths_util(graph, i, d, visited, path)
+            path.pop()
+            visited[u] = False
+
+        _get_all_paths_util(self.graph, s, d, visited, path)
+        return all_paths
+
+    @staticmethod
+    def from_ssa(ssa):
+        graph = DiGraph()
+        for op_id in range(len(ssa)):
+            for inp in ssa[op_id][0]:
+                for outp in ssa[op_id][1]:
+                    graph.add_edge(inp, outp)
+        return graph
+
+
+def _get_dependency_chain(ssa, versioned_target, versioned_source):
+    """
+    Return the index list of relevant operator to produce target blob from source blob,
+        if there's no dependency, return empty list.
+    """
+
+    # finding all paths between nodes can be O(N!), thus we can only search
+    # in the subgraph using the op starting from the first consumer of source blob
+    # to the producer of the target blob.
+    consumer_map = get_consumer_map(ssa)
+    producer_map = get_producer_map(ssa)
+    start_op = min(x[0] for x in consumer_map[versioned_source]) - 15
+    end_op = (
+        producer_map[versioned_target][0] + 15 if versioned_target in producer_map else start_op
+    )
+    sub_graph_ssa = ssa[start_op : end_op + 1]
+    if len(sub_graph_ssa) > 30:
+        logger.warning(
+            "Subgraph bebetween {} and {} is large (from op#{} to op#{}), it"
+            " might take non-trival time to find all paths between them.".format(
+                versioned_source, versioned_target, start_op, end_op
+            )
+        )
+
+    dag = DiGraph.from_ssa(sub_graph_ssa)
+    paths = dag.get_all_paths(versioned_source, versioned_target)  # include two ends
+    ops_in_paths = [[producer_map[blob][0] for blob in path[1:]] for path in paths]
+    return sorted(set().union(*[set(ops) for ops in ops_in_paths]))
+
+
+def identify_reshape_sub_graph(predict_net: caffe2_pb2.NetDef) -> List[List[int]]:
+    """
+    Idenfity the reshape sub-graph in a protobuf.
+    The reshape sub-graph is defined as matching the following pattern:
+
+    (input_blob) -> Op_1 -> ... -> Op_N -> (new_shape) -─┐
+        └-------------------------------------------> Reshape -> (output_blob)
+
+    Return:
+        List of sub-graphs, each sub-graph is represented as a list of indices
+        of the relavent ops, [Op_1, Op_2, ..., Op_N, Reshape]
+    """
+
+    ssa, _ = core.get_ssa(predict_net)
+
+    ret = []
+    for i, op in enumerate(predict_net.op):
+        if op.type == "Reshape":
+            assert len(op.input) == 2
+            input_ssa = ssa[i][0]
+            data_source = input_ssa[0]
+            shape_source = input_ssa[1]
+            op_indices = _get_dependency_chain(ssa, shape_source, data_source)
+            ret.append(op_indices + [i])
+    return ret
+
+
+def remove_reshape_for_fc(predict_net, params):
+    """
+    In PyTorch nn.Linear has to take 2D tensor, this often leads to reshape
+        a 4D tensor to 2D by calling .view(). However this (dynamic) reshaping
+        doesn't work well with ONNX and Int8 tools, and cause using extra
+        ops (eg. ExpandDims) that might not be available on mobile.
+    Luckily Caffe2 supports 4D tensor for FC, so we can remove those reshape
+        after exporting ONNX model.
+    """
+    from caffe2.python import core
+
+    # find all reshape sub-graph that can be removed, which is now all Reshape
+    # sub-graph whose output is only consumed by FC.
+    # TODO: to make it safer, we may need the actually value to better determine
+    # if a Reshape before FC is removable.
+    reshape_sub_graphs = identify_reshape_sub_graph(predict_net)
+    sub_graphs_to_remove = []
+    for reshape_sub_graph in reshape_sub_graphs:
+        reshape_op_id = reshape_sub_graph[-1]
+        assert predict_net.op[reshape_op_id].type == "Reshape"
+        ssa, _ = core.get_ssa(predict_net)
+        reshape_output = ssa[reshape_op_id][1][0]
+        consumers = [i for i in range(len(ssa)) if reshape_output in ssa[i][0]]
+        if all(predict_net.op[consumer].type == "FC" for consumer in consumers):
+            # safety check if the sub-graph is isolated, for this reshape sub-graph,
+            # it means it has one non-param external input and one external output.
+            ext_inputs, ext_outputs = get_sub_graph_external_input_output(
+                predict_net, reshape_sub_graph
+            )
+            non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0]
+            if len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1:
+                sub_graphs_to_remove.append(reshape_sub_graph)
+
+    # perform removing subgraph by:
+    # 1: rename the Reshape's output to its input, then the graph can be
+    #   seen as in-place itentify, meaning whose external input/output are the same.
+    # 2: simply remove those ops.
+    remove_op_ids = []
+    params_to_remove = []
+    for sub_graph in sub_graphs_to_remove:
+        logger.info(
+            "Remove Reshape sub-graph:\n{}".format(
+                "".join(["(#{:>4})\n{}".format(i, predict_net.op[i]) for i in sub_graph])
+            )
+        )
+        reshape_op_id = sub_graph[-1]
+        new_reshap_output = predict_net.op[reshape_op_id].input[0]
+        rename_op_output(predict_net, reshape_op_id, 0, new_reshap_output)
+        ext_inputs, ext_outputs = get_sub_graph_external_input_output(predict_net, sub_graph)
+        non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0]
+        params_ext_inputs = [inp for inp in ext_inputs if inp[1] == 0]
+        assert len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1
+        assert ext_outputs[0][0] == non_params_ext_inputs[0][0]
+        assert ext_outputs[0][1] == non_params_ext_inputs[0][1] + 1
+        remove_op_ids.extend(sub_graph)
+        params_to_remove.extend(params_ext_inputs)
+
+    predict_net = copy.deepcopy(predict_net)
+    new_ops = [op for i, op in enumerate(predict_net.op) if i not in remove_op_ids]
+    del predict_net.op[:]
+    predict_net.op.extend(new_ops)
+    for versioned_params in params_to_remove:
+        name = versioned_params[0]
+        logger.info("Remove params: {} from init_net and predict_net.external_input".format(name))
+        del params[name]
+        predict_net.external_input.remove(name)
+
+    return predict_net, params
+
+
+def fuse_copy_between_cpu_and_gpu(predict_net: caffe2_pb2.NetDef):
+    """
+    In-place fuse extra copy ops between cpu/gpu for the following case:
+        a -CopyAToB-> b -CopyBToA> c1 -NextOp1-> d1
+                        -CopyBToA> c2 -NextOp2-> d2
+    The fused network will look like:
+        a -NextOp1-> d1
+          -NextOp2-> d2
+    """
+
+    _COPY_OPS = ["CopyCPUToGPU", "CopyGPUToCPU"]
+
+    def _fuse_once(predict_net):
+        ssa, blob_versions = core.get_ssa(predict_net)
+        consumer_map = get_consumer_map(ssa)
+        versioned_external_output = [
+            (name, blob_versions[name]) for name in predict_net.external_output
+        ]
+
+        for op_id, op in enumerate(predict_net.op):
+            if op.type in _COPY_OPS:
+                fw_copy_versioned_output = ssa[op_id][1][0]
+                consumer_ids = [x[0] for x in consumer_map[fw_copy_versioned_output]]
+                reverse_op_type = _COPY_OPS[1 - _COPY_OPS.index(op.type)]
+
+                is_fusable = (
+                    len(consumer_ids) > 0
+                    and fw_copy_versioned_output not in versioned_external_output
+                    and all(
+                        predict_net.op[_op_id].type == reverse_op_type
+                        and ssa[_op_id][1][0] not in versioned_external_output
+                        for _op_id in consumer_ids
+                    )
+                )
+
+                if is_fusable:
+                    for rv_copy_op_id in consumer_ids:
+                        # making each NextOp uses "a" directly and removing Copy ops
+                        rs_copy_versioned_output = ssa[rv_copy_op_id][1][0]
+                        next_op_id, inp_id = consumer_map[rs_copy_versioned_output][0]
+                        predict_net.op[next_op_id].input[inp_id] = op.input[0]
+                    # remove CopyOps
+                    new_ops = [
+                        op
+                        for i, op in enumerate(predict_net.op)
+                        if i != op_id and i not in consumer_ids
+                    ]
+                    del predict_net.op[:]
+                    predict_net.op.extend(new_ops)
+                    return True
+
+        return False
+
+    # _fuse_once returns False is nothing can be fused
+    while _fuse_once(predict_net):
+        pass
+
+
+def remove_dead_end_ops(net_def: caffe2_pb2.NetDef):
+    """ remove ops if its output is not used or not in external_output """
+    ssa, versions = core.get_ssa(net_def)
+    versioned_external_output = [(name, versions[name]) for name in net_def.external_output]
+    consumer_map = get_consumer_map(ssa)
+    removed_op_ids = set()
+
+    def _is_dead_end(versioned_blob):
+        return not (
+            versioned_blob in versioned_external_output
+            or (
+                len(consumer_map[versioned_blob]) > 0
+                and all(x[0] not in removed_op_ids for x in consumer_map[versioned_blob])
+            )
+        )
+
+    for i, ssa_i in reversed(list(enumerate(ssa))):
+        versioned_outputs = ssa_i[1]
+        if all(_is_dead_end(outp) for outp in versioned_outputs):
+            removed_op_ids.add(i)
+
+    # simply removing those deadend ops should have no effect to external_output
+    new_ops = [op for i, op in enumerate(net_def.op) if i not in removed_op_ids]
+    del net_def.op[:]
+    net_def.op.extend(new_ops)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/torchscript.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/torchscript.py
new file mode 100644
index 0000000000000000000000000000000000000000..7939ae51fff9c4f9f55f48c1e3c69b70106dbdea
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/torchscript.py
@@ -0,0 +1,127 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import os
+import torch
+
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.file_io import PathManager
+
+from .torchscript_patch import freeze_training_mode, patch_instances
+
+__all__ = ["scripting_with_instances", "dump_torchscript_IR"]
+
+
+def scripting_with_instances(model, fields):
+    """
+    Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since
+    attributes of :class:`Instances` are "dynamically" added in eager mode，it is difficult
+    for scripting to support it out of the box. This function is made to support scripting
+    a model that uses :class:`Instances`. It does the following:
+
+    1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``,
+       but with all attributes been "static".
+       The attributes need to be statically declared in the ``fields`` argument.
+    2. Register ``new_Instances``, and force scripting compiler to
+       use it when trying to compile ``Instances``.
+
+    After this function, the process will be reverted. User should be able to script another model
+    using different fields.
+
+    Example:
+        Assume that ``Instances`` in the model consist of two attributes named
+        ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and
+        :class:`Tensor` respectively during inference. You can call this function like:
+        ::
+            fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor}
+            torchscipt_model =  scripting_with_instances(model, fields)
+
+    Note:
+        It only support models in evaluation mode.
+
+    Args:
+        model (nn.Module): The input model to be exported by scripting.
+        fields (Dict[str, type]): Attribute names and corresponding type that
+            ``Instances`` will use in the model. Note that all attributes used in ``Instances``
+            need to be added, regardless of whether they are inputs/outputs of the model.
+            Data type not defined in detectron2 is not supported for now.
+
+    Returns:
+        torch.jit.ScriptModule: the model in torchscript format
+    """
+    assert TORCH_VERSION >= (1, 8), "This feature is not available in PyTorch < 1.8"
+    assert (
+        not model.training
+    ), "Currently we only support exporting models in evaluation mode to torchscript"
+
+    with freeze_training_mode(model), patch_instances(fields):
+        scripted_model = torch.jit.script(model)
+        return scripted_model
+
+
+# alias for old name
+export_torchscript_with_instances = scripting_with_instances
+
+
+def dump_torchscript_IR(model, dir):
+    """
+    Dump IR of a TracedModule/ScriptModule at various levels.
+    Useful for debugging.
+
+    Args:
+        model (TracedModule or ScriptModule): traced or scripted module
+        dir (str): output directory to dump files.
+    """
+    # TODO: support ScriptFunction as well
+    PathManager.mkdirs(dir)
+
+    def _get_script_mod(mod):
+        if isinstance(mod, torch.jit.TracedModule):
+            return mod._actual_script_module
+        return mod
+
+    # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code
+    with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f:
+
+        def get_code(mod):
+            # Try a few ways to get code using private attributes.
+            try:
+                # This contains more information than just `mod.code`
+                return _get_script_mod(mod)._c.code
+            except AttributeError:
+                pass
+            try:
+                return mod.code
+            except AttributeError:
+                return None
+
+        def dump_code(prefix, mod):
+            code = get_code(mod)
+            name = prefix or "root model"
+            if code is None:
+                f.write(f"Could not found code for {name} (type={mod.original_name})\n")
+                f.write("\n")
+            else:
+                f.write(f"\nCode for {name}, type={mod.original_name}:\n")
+                f.write(code)
+                f.write("\n")
+                f.write("-" * 80)
+
+            for name, m in mod.named_children():
+                dump_code(prefix + "." + name, m)
+
+        dump_code("", model)
+
+    # Recursively dump IR of all modules
+    with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f:
+        try:
+            f.write(_get_script_mod(model)._c.dump_to_str(True, False, False))
+        except AttributeError:
+            pass
+
+    # Dump IR of the entire graph (all submodules inlined)
+    with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f:
+        f.write(str(model.inlined_graph))
+
+    # Dump the model structure in pytorch style
+    with PathManager.open(os.path.join(dir, "model.txt"), "w") as f:
+        f.write(str(model))
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/torchscript_patch.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/torchscript_patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..618e7e0c4bd58e4fc1dc7c3d0e69c1b2ae73089e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/export/torchscript_patch.py
@@ -0,0 +1,377 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import os
+import sys
+import tempfile
+from contextlib import ExitStack, contextmanager
+from copy import deepcopy
+from unittest import mock
+import torch
+from torch import nn
+
+# need some explicit imports due to https://github.com/pytorch/pytorch/issues/38964
+import detectron2  # noqa F401
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.env import _import_file
+
+_counter = 0
+
+
+def _clear_jit_cache():
+    from torch.jit._recursive import concrete_type_store
+    from torch.jit._state import _jit_caching_layer
+
+    concrete_type_store.type_store.clear()  # for modules
+    _jit_caching_layer.clear()  # for free functions
+
+
+def _add_instances_conversion_methods(newInstances):
+    """
+    Add from_instances methods to the scripted Instances class.
+    """
+    cls_name = newInstances.__name__
+
+    @torch.jit.unused
+    def from_instances(instances: Instances):
+        """
+        Create scripted Instances from original Instances
+        """
+        fields = instances.get_fields()
+        image_size = instances.image_size
+        ret = newInstances(image_size)
+        for name, val in fields.items():
+            assert hasattr(ret, f"_{name}"), f"No attribute named {name} in {cls_name}"
+            setattr(ret, name, deepcopy(val))
+        return ret
+
+    newInstances.from_instances = from_instances
+
+
+@contextmanager
+def patch_instances(fields):
+    """
+    A contextmanager, under which the Instances class in detectron2 is replaced
+    by a statically-typed scriptable class, defined by `fields`.
+    See more in `scripting_with_instances`.
+    """
+
+    with tempfile.TemporaryDirectory(prefix="detectron2") as dir, tempfile.NamedTemporaryFile(
+        mode="w", encoding="utf-8", suffix=".py", dir=dir, delete=False
+    ) as f:
+        try:
+            # Objects that use Instances should not reuse previously-compiled
+            # results in cache, because `Instances` could be a new class each time.
+            _clear_jit_cache()
+
+            cls_name, s = _gen_instance_module(fields)
+            f.write(s)
+            f.flush()
+            f.close()
+
+            module = _import(f.name)
+            new_instances = getattr(module, cls_name)
+            _ = torch.jit.script(new_instances)
+            # let torchscript think Instances was scripted already
+            Instances.__torch_script_class__ = True
+            # let torchscript find new_instances when looking for the jit type of Instances
+            Instances._jit_override_qualname = torch._jit_internal._qualified_name(new_instances)
+
+            _add_instances_conversion_methods(new_instances)
+            yield new_instances
+        finally:
+            try:
+                del Instances.__torch_script_class__
+                del Instances._jit_override_qualname
+            except AttributeError:
+                pass
+            sys.modules.pop(module.__name__)
+
+
+def _gen_instance_class(fields):
+    """
+    Args:
+        fields (dict[name: type])
+    """
+
+    class _FieldType:
+        def __init__(self, name, type_):
+            assert isinstance(name, str), f"Field name must be str, got {name}"
+            self.name = name
+            self.type_ = type_
+            self.annotation = f"{type_.__module__}.{type_.__name__}"
+
+    fields = [_FieldType(k, v) for k, v in fields.items()]
+
+    def indent(level, s):
+        return " " * 4 * level + s
+
+    lines = []
+
+    global _counter
+    _counter += 1
+
+    cls_name = "ScriptedInstances{}".format(_counter)
+
+    field_names = tuple(x.name for x in fields)
+    lines.append(
+        f"""
+class {cls_name}:
+    def __init__(self, image_size: Tuple[int, int]):
+        self.image_size = image_size
+        self._field_names = {field_names}
+"""
+    )
+
+    for f in fields:
+        lines.append(
+            indent(2, f"self._{f.name} = torch.jit.annotate(Optional[{f.annotation}], None)")
+        )
+
+    for f in fields:
+        lines.append(
+            f"""
+    @property
+    def {f.name}(self) -> {f.annotation}:
+        # has to use a local for type refinement
+        # https://pytorch.org/docs/stable/jit_language_reference.html#optional-type-refinement
+        t = self._{f.name}
+        assert t is not None
+        return t
+
+    @{f.name}.setter
+    def {f.name}(self, value: {f.annotation}) -> None:
+        self._{f.name} = value
+"""
+        )
+
+    # support method `__len__`
+    lines.append(
+        """
+    def __len__(self) -> int:
+"""
+    )
+    for f in fields:
+        lines.append(
+            f"""
+        t = self._{f.name}
+        if t is not None:
+            return len(t)
+"""
+        )
+    lines.append(
+        """
+        raise NotImplementedError("Empty Instances does not support __len__!")
+"""
+    )
+
+    # support method `has`
+    lines.append(
+        """
+    def has(self, name: str) -> bool:
+"""
+    )
+    for f in fields:
+        lines.append(
+            f"""
+        if name == "{f.name}":
+            return self._{f.name} is not None
+"""
+        )
+    lines.append(
+        """
+        return False
+"""
+    )
+
+    # support method `to`
+    lines.append(
+        f"""
+    def to(self, device: torch.device) -> "{cls_name}":
+        ret = {cls_name}(self.image_size)
+"""
+    )
+    for f in fields:
+        if hasattr(f.type_, "to"):
+            lines.append(
+                f"""
+        t = self._{f.name}
+        if t is not None:
+            ret._{f.name} = t.to(device)
+"""
+            )
+        else:
+            # For now, ignore fields that cannot be moved to devices.
+            # Maybe can support other tensor-like classes (e.g. __torch_function__)
+            pass
+    lines.append(
+        """
+        return ret
+"""
+    )
+
+    # support method `getitem`
+    lines.append(
+        f"""
+    def __getitem__(self, item) -> "{cls_name}":
+        ret = {cls_name}(self.image_size)
+"""
+    )
+    for f in fields:
+        lines.append(
+            f"""
+        t = self._{f.name}
+        if t is not None:
+            ret._{f.name} = t[item]
+"""
+        )
+    lines.append(
+        """
+        return ret
+"""
+    )
+
+    # support method `get_fields()`
+    lines.append(
+        """
+    def get_fields(self) -> Dict[str, Tensor]:
+        ret = {}
+    """
+    )
+    for f in fields:
+        if f.type_ == Boxes:
+            stmt = "t.tensor"
+        elif f.type_ == torch.Tensor:
+            stmt = "t"
+        else:
+            stmt = f'assert False, "unsupported type {str(f.type_)}"'
+        lines.append(
+            f"""
+        t = self._{f.name}
+        if t is not None:
+            ret["{f.name}"] = {stmt}
+        """
+        )
+    lines.append(
+        """
+        return ret"""
+    )
+    return cls_name, os.linesep.join(lines)
+
+
+def _gen_instance_module(fields):
+    # TODO: find a more automatic way to enable import of other classes
+    s = """
+from copy import deepcopy
+import torch
+from torch import Tensor
+import typing
+from typing import *
+
+import detectron2
+from detectron2.structures import Boxes, Instances
+
+"""
+
+    cls_name, cls_def = _gen_instance_class(fields)
+    s += cls_def
+    return cls_name, s
+
+
+def _import(path):
+    return _import_file(
+        "{}{}".format(sys.modules[__name__].__name__, _counter), path, make_importable=True
+    )
+
+
+@contextmanager
+def patch_builtin_len(modules=()):
+    """
+    Patch the builtin len() function of a few detectron2 modules
+    to use __len__ instead, because __len__ does not convert values to
+    integers and therefore is friendly to tracing.
+
+    Args:
+        modules (list[stsr]): names of extra modules to patch len(), in
+            addition to those in detectron2.
+    """
+
+    def _new_len(obj):
+        return obj.__len__()
+
+    with ExitStack() as stack:
+        MODULES = [
+            "detectron2.modeling.roi_heads.fast_rcnn",
+            "detectron2.modeling.roi_heads.mask_head",
+            "detectron2.modeling.roi_heads.keypoint_head",
+        ] + list(modules)
+        ctxs = [stack.enter_context(mock.patch(mod + ".len")) for mod in MODULES]
+        for m in ctxs:
+            m.side_effect = _new_len
+        yield
+
+
+def patch_nonscriptable_classes():
+    """
+    Apply patches on a few nonscriptable detectron2 classes.
+    Should not have side-effects on eager usage.
+    """
+    # __prepare_scriptable__ can also be added to models for easier maintenance.
+    # But it complicates the clean model code.
+
+    from detectron2.modeling.backbone import ResNet, FPN
+
+    # Due to https://github.com/pytorch/pytorch/issues/36061,
+    # we change backbone to use ModuleList for scripting.
+    # (note: this changes param names in state_dict)
+
+    def prepare_resnet(self):
+        ret = deepcopy(self)
+        ret.stages = nn.ModuleList(ret.stages)
+        for k in self.stage_names:
+            delattr(ret, k)
+        return ret
+
+    ResNet.__prepare_scriptable__ = prepare_resnet
+
+    def prepare_fpn(self):
+        ret = deepcopy(self)
+        ret.lateral_convs = nn.ModuleList(ret.lateral_convs)
+        ret.output_convs = nn.ModuleList(ret.output_convs)
+        for name, _ in self.named_children():
+            if name.startswith("fpn_"):
+                delattr(ret, name)
+        return ret
+
+    FPN.__prepare_scriptable__ = prepare_fpn
+
+    # Annotate some attributes to be constants for the purpose of scripting,
+    # even though they are not constants in eager mode.
+    from detectron2.modeling.roi_heads import StandardROIHeads
+
+    if hasattr(StandardROIHeads, "__annotations__"):
+        # copy first to avoid editing annotations of base class
+        StandardROIHeads.__annotations__ = deepcopy(StandardROIHeads.__annotations__)
+        StandardROIHeads.__annotations__["mask_on"] = torch.jit.Final[bool]
+        StandardROIHeads.__annotations__["keypoint_on"] = torch.jit.Final[bool]
+
+
+# These patches are not supposed to have side-effects.
+patch_nonscriptable_classes()
+
+
+@contextmanager
+def freeze_training_mode(model):
+    """
+    A context manager that annotates the "training" attribute of every submodule
+    to constant, so that the training codepath in these modules can be
+    meta-compiled away. Upon exiting, the annotations are reverted.
+    """
+    classes = {type(x) for x in model.modules()}
+    # __constants__ is the old way to annotate constants and not compatible
+    # with __annotations__ .
+    classes = {x for x in classes if not hasattr(x, "__constants__")}
+    for cls in classes:
+        cls.__annotations__["training"] = torch.jit.Final[bool]
+    yield
+    for cls in classes:
+        cls.__annotations__["training"] = bool
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8bd1fb024d1cb911dda3f8a77f7ec3ad2e63798
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm
+from .deform_conv import DeformConv, ModulatedDeformConv
+from .mask_ops import paste_masks_in_image
+from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated
+from .roi_align import ROIAlign, roi_align
+from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
+from .shape_spec import ShapeSpec
+from .wrappers import (
+    BatchNorm2d,
+    Conv2d,
+    ConvTranspose2d,
+    cat,
+    interpolate,
+    Linear,
+    nonzero_tuple,
+    cross_entropy,
+)
+from .blocks import CNNBlockBase, DepthwiseSeparableConv2d
+from .aspp import ASPP
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/aspp.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/aspp.py
new file mode 100644
index 0000000000000000000000000000000000000000..14861aa9ede4fea6a69a49f189bcab997b558148
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/aspp.py
@@ -0,0 +1,144 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from copy import deepcopy
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from .batch_norm import get_norm
+from .blocks import DepthwiseSeparableConv2d
+from .wrappers import Conv2d
+
+
+class ASPP(nn.Module):
+    """
+    Atrous Spatial Pyramid Pooling (ASPP).
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        dilations,
+        *,
+        norm,
+        activation,
+        pool_kernel_size=None,
+        dropout: float = 0.0,
+        use_depthwise_separable_conv=False,
+    ):
+        """
+        Args:
+            in_channels (int): number of input channels for ASPP.
+            out_channels (int): number of output channels.
+            dilations (list): a list of 3 dilations in ASPP.
+            norm (str or callable): normalization for all conv layers.
+                See :func:`layers.get_norm` for supported format. norm is
+                applied to all conv layers except the conv following
+                global average pooling.
+            activation (callable): activation function.
+            pool_kernel_size (tuple, list): the average pooling size (kh, kw)
+                for image pooling layer in ASPP. If set to None, it always
+                performs global average pooling. If not None, it must be
+                divisible by the shape of inputs in forward(). It is recommended
+                to use a fixed input feature size in training, and set this
+                option to match this size, so that it performs global average
+                pooling in training, and the size of the pooling window stays
+                consistent in inference.
+            dropout (float): apply dropout on the output of ASPP. It is used in
+                the official DeepLab implementation with a rate of 0.1:
+                https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532  # noqa
+            use_depthwise_separable_conv (bool): use DepthwiseSeparableConv2d
+                for 3x3 convs in ASPP, proposed in :paper:`DeepLabV3+`.
+        """
+        super(ASPP, self).__init__()
+        assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations))
+        self.pool_kernel_size = pool_kernel_size
+        self.dropout = dropout
+        use_bias = norm == ""
+        self.convs = nn.ModuleList()
+        # conv 1x1
+        self.convs.append(
+            Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                bias=use_bias,
+                norm=get_norm(norm, out_channels),
+                activation=deepcopy(activation),
+            )
+        )
+        weight_init.c2_xavier_fill(self.convs[-1])
+        # atrous convs
+        for dilation in dilations:
+            if use_depthwise_separable_conv:
+                self.convs.append(
+                    DepthwiseSeparableConv2d(
+                        in_channels,
+                        out_channels,
+                        kernel_size=3,
+                        padding=dilation,
+                        dilation=dilation,
+                        norm1=norm,
+                        activation1=deepcopy(activation),
+                        norm2=norm,
+                        activation2=deepcopy(activation),
+                    )
+                )
+            else:
+                self.convs.append(
+                    Conv2d(
+                        in_channels,
+                        out_channels,
+                        kernel_size=3,
+                        padding=dilation,
+                        dilation=dilation,
+                        bias=use_bias,
+                        norm=get_norm(norm, out_channels),
+                        activation=deepcopy(activation),
+                    )
+                )
+                weight_init.c2_xavier_fill(self.convs[-1])
+        # image pooling
+        # We do not add BatchNorm because the spatial resolution is 1x1,
+        # the original TF implementation has BatchNorm.
+        if pool_kernel_size is None:
+            image_pooling = nn.Sequential(
+                nn.AdaptiveAvgPool2d(1),
+                Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
+            )
+        else:
+            image_pooling = nn.Sequential(
+                nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1),
+                Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
+            )
+        weight_init.c2_xavier_fill(image_pooling[1])
+        self.convs.append(image_pooling)
+
+        self.project = Conv2d(
+            5 * out_channels,
+            out_channels,
+            kernel_size=1,
+            bias=use_bias,
+            norm=get_norm(norm, out_channels),
+            activation=deepcopy(activation),
+        )
+        weight_init.c2_xavier_fill(self.project)
+
+    def forward(self, x):
+        size = x.shape[-2:]
+        if self.pool_kernel_size is not None:
+            if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]:
+                raise ValueError(
+                    "`pool_kernel_size` must be divisible by the shape of inputs. "
+                    "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size)
+                )
+        res = []
+        for conv in self.convs:
+            res.append(conv(x))
+        res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False)
+        res = torch.cat(res, dim=1)
+        res = self.project(res)
+        res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res
+        return res
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/batch_norm.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/batch_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5382834d2997aa348932430709acb8b35be31b4
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/batch_norm.py
@@ -0,0 +1,231 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import torch
+import torch.distributed as dist
+from fvcore.nn.distributed import differentiable_all_reduce
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.utils import comm, env
+
+from .wrappers import BatchNorm2d
+
+
+class FrozenBatchNorm2d(nn.Module):
+    """
+    BatchNorm2d where the batch statistics and the affine parameters are fixed.
+
+    It contains non-trainable buffers called
+    "weight" and "bias", "running_mean", "running_var",
+    initialized to perform identity transformation.
+
+    The pre-trained backbone models from Caffe2 only contain "weight" and "bias",
+    which are computed from the original four parameters of BN.
+    The affine transform `x * weight + bias` will perform the equivalent
+    computation of `(x - running_mean) / sqrt(running_var) * weight + bias`.
+    When loading a backbone model from Caffe2, "running_mean" and "running_var"
+    will be left unchanged as identity transformation.
+
+    Other pre-trained backbone models may contain all 4 parameters.
+
+    The forward is implemented by `F.batch_norm(..., training=False)`.
+    """
+
+    _version = 3
+
+    def __init__(self, num_features, eps=1e-5):
+        super().__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.register_buffer("weight", torch.ones(num_features))
+        self.register_buffer("bias", torch.zeros(num_features))
+        self.register_buffer("running_mean", torch.zeros(num_features))
+        self.register_buffer("running_var", torch.ones(num_features) - eps)
+
+    def forward(self, x):
+        if x.requires_grad:
+            # When gradients are needed, F.batch_norm will use extra memory
+            # because its backward op computes gradients for weight/bias as well.
+            scale = self.weight * (self.running_var + self.eps).rsqrt()
+            bias = self.bias - self.running_mean * scale
+            scale = scale.reshape(1, -1, 1, 1)
+            bias = bias.reshape(1, -1, 1, 1)
+            out_dtype = x.dtype  # may be half
+            return x * scale.to(out_dtype) + bias.to(out_dtype)
+        else:
+            # When gradients are not needed, F.batch_norm is a single fused op
+            # and provide more optimization opportunities.
+            return F.batch_norm(
+                x,
+                self.running_mean,
+                self.running_var,
+                self.weight,
+                self.bias,
+                training=False,
+                eps=self.eps,
+            )
+
+    def _load_from_state_dict(
+        self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+    ):
+        version = local_metadata.get("version", None)
+
+        if version is None or version < 2:
+            # No running_mean/var in early versions
+            # This will silent the warnings
+            if prefix + "running_mean" not in state_dict:
+                state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean)
+            if prefix + "running_var" not in state_dict:
+                state_dict[prefix + "running_var"] = torch.ones_like(self.running_var)
+
+        # NOTE: if a checkpoint is trained with BatchNorm and loaded (together with
+        # version number) to FrozenBatchNorm, running_var will be wrong. One solution
+        # is to remove the version number from the checkpoint.
+        if version is not None and version < 3:
+            logger = logging.getLogger(__name__)
+            logger.info("FrozenBatchNorm {} is upgraded to version 3.".format(prefix.rstrip(".")))
+            # In version < 3, running_var are used without +eps.
+            state_dict[prefix + "running_var"] -= self.eps
+
+        super()._load_from_state_dict(
+            state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+        )
+
+    def __repr__(self):
+        return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps)
+
+    @classmethod
+    def convert_frozen_batchnorm(cls, module):
+        """
+        Convert all BatchNorm/SyncBatchNorm in module into FrozenBatchNorm.
+
+        Args:
+            module (torch.nn.Module):
+
+        Returns:
+            If module is BatchNorm/SyncBatchNorm, returns a new module.
+            Otherwise, in-place convert module and return it.
+
+        Similar to convert_sync_batchnorm in
+        https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py
+        """
+        bn_module = nn.modules.batchnorm
+        bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm)
+        res = module
+        if isinstance(module, bn_module):
+            res = cls(module.num_features)
+            if module.affine:
+                res.weight.data = module.weight.data.clone().detach()
+                res.bias.data = module.bias.data.clone().detach()
+            res.running_mean.data = module.running_mean.data
+            res.running_var.data = module.running_var.data
+            res.eps = module.eps
+        else:
+            for name, child in module.named_children():
+                new_child = cls.convert_frozen_batchnorm(child)
+                if new_child is not child:
+                    res.add_module(name, new_child)
+        return res
+
+
+def get_norm(norm, out_channels):
+    """
+    Args:
+        norm (str or callable): either one of BN, SyncBN, FrozenBN, GN;
+            or a callable that takes a channel number and returns
+            the normalization layer as a nn.Module.
+
+    Returns:
+        nn.Module or None: the normalization layer
+    """
+    if norm is None:
+        return None
+    if isinstance(norm, str):
+        if len(norm) == 0:
+            return None
+        norm = {
+            "BN": BatchNorm2d,
+            # Fixed in https://github.com/pytorch/pytorch/pull/36382
+            "SyncBN": NaiveSyncBatchNorm if env.TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm,
+            "FrozenBN": FrozenBatchNorm2d,
+            "GN": lambda channels: nn.GroupNorm(32, channels),
+            # for debugging:
+            "nnSyncBN": nn.SyncBatchNorm,
+            "naiveSyncBN": NaiveSyncBatchNorm,
+        }[norm]
+    return norm(out_channels)
+
+
+class NaiveSyncBatchNorm(BatchNorm2d):
+    """
+    In PyTorch<=1.5, ``nn.SyncBatchNorm`` has incorrect gradient
+    when the batch size on each worker is different.
+    (e.g., when scale augmentation is used, or when it is applied to mask head).
+
+    This is a slower but correct alternative to `nn.SyncBatchNorm`.
+
+    Note:
+        There isn't a single definition of Sync BatchNorm.
+
+        When ``stats_mode==""``, this module computes overall statistics by using
+        statistics of each worker with equal weight.  The result is true statistics
+        of all samples (as if they are all on one worker) only when all workers
+        have the same (N, H, W). This mode does not support inputs with zero batch size.
+
+        When ``stats_mode=="N"``, this module computes overall statistics by weighting
+        the statistics of each worker by their ``N``. The result is true statistics
+        of all samples (as if they are all on one worker) only when all workers
+        have the same (H, W). It is slower than ``stats_mode==""``.
+
+        Even though the result of this module may not be the true statistics of all samples,
+        it may still be reasonable because it might be preferrable to assign equal weights
+        to all workers, regardless of their (H, W) dimension, instead of putting larger weight
+        on larger images. From preliminary experiments, little difference is found between such
+        a simplified implementation and an accurate computation of overall mean & variance.
+    """
+
+    def __init__(self, *args, stats_mode="", **kwargs):
+        super().__init__(*args, **kwargs)
+        assert stats_mode in ["", "N"]
+        self._stats_mode = stats_mode
+
+    def forward(self, input):
+        if comm.get_world_size() == 1 or not self.training:
+            return super().forward(input)
+
+        B, C = input.shape[0], input.shape[1]
+
+        mean = torch.mean(input, dim=[0, 2, 3])
+        meansqr = torch.mean(input * input, dim=[0, 2, 3])
+
+        if self._stats_mode == "":
+            assert B > 0, 'SyncBatchNorm(stats_mode="") does not support zero batch size.'
+            vec = torch.cat([mean, meansqr], dim=0)
+            vec = differentiable_all_reduce(vec) * (1.0 / dist.get_world_size())
+            mean, meansqr = torch.split(vec, C)
+            momentum = self.momentum
+        else:
+            if B == 0:
+                vec = torch.zeros([2 * C + 1], device=mean.device, dtype=mean.dtype)
+                vec = vec + input.sum()  # make sure there is gradient w.r.t input
+            else:
+                vec = torch.cat(
+                    [mean, meansqr, torch.ones([1], device=mean.device, dtype=mean.dtype)], dim=0
+                )
+            vec = differentiable_all_reduce(vec * B)
+
+            total_batch = vec[-1].detach()
+            momentum = total_batch.clamp(max=1) * self.momentum  # no update if total_batch is 0
+            total_batch = torch.max(total_batch, torch.ones_like(total_batch))  # avoid div-by-zero
+            mean, meansqr, _ = torch.split(vec / total_batch, C)
+
+        var = meansqr - mean * mean
+        invstd = torch.rsqrt(var + self.eps)
+        scale = self.weight * invstd
+        bias = self.bias - mean * scale
+        scale = scale.reshape(1, -1, 1, 1)
+        bias = bias.reshape(1, -1, 1, 1)
+
+        self.running_mean += momentum * (mean.detach() - self.running_mean)
+        self.running_var += momentum * (var.detach() - self.running_var)
+        return input * scale + bias
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/blocks.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/blocks.py
new file mode 100644
index 0000000000000000000000000000000000000000..1995a4bf7339e8deb7eaaffda4f819dda55e7ac7
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/blocks.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import fvcore.nn.weight_init as weight_init
+from torch import nn
+
+from .batch_norm import FrozenBatchNorm2d, get_norm
+from .wrappers import Conv2d
+
+
+"""
+CNN building blocks.
+"""
+
+
+class CNNBlockBase(nn.Module):
+    """
+    A CNN block is assumed to have input channels, output channels and a stride.
+    The input and output of `forward()` method must be NCHW tensors.
+    The method can perform arbitrary computation but must match the given
+    channels and stride specification.
+
+    Attribute:
+        in_channels (int):
+        out_channels (int):
+        stride (int):
+    """
+
+    def __init__(self, in_channels, out_channels, stride):
+        """
+        The `__init__` method of any subclass should also contain these arguments.
+
+        Args:
+            in_channels (int):
+            out_channels (int):
+            stride (int):
+        """
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+
+    def freeze(self):
+        """
+        Make this block not trainable.
+        This method sets all parameters to `requires_grad=False`,
+        and convert all BatchNorm layers to FrozenBatchNorm
+
+        Returns:
+            the block itself
+        """
+        for p in self.parameters():
+            p.requires_grad = False
+        FrozenBatchNorm2d.convert_frozen_batchnorm(self)
+        return self
+
+
+class DepthwiseSeparableConv2d(nn.Module):
+    """
+    A kxk depthwise convolution + a 1x1 convolution.
+
+    In :paper:`xception`, norm & activation are applied on the second conv.
+    :paper:`mobilenet` uses norm & activation on both convs.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=3,
+        padding=1,
+        dilation=1,
+        *,
+        norm1=None,
+        activation1=None,
+        norm2=None,
+        activation2=None,
+    ):
+        """
+        Args:
+            norm1, norm2 (str or callable): normalization for the two conv layers.
+            activation1, activation2 (callable(Tensor) -> Tensor): activation
+                function for the two conv layers.
+        """
+        super().__init__()
+        self.depthwise = Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=kernel_size,
+            padding=padding,
+            dilation=dilation,
+            groups=in_channels,
+            bias=not norm1,
+            norm=get_norm(norm1, in_channels),
+            activation=activation1,
+        )
+        self.pointwise = Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=1,
+            bias=not norm2,
+            norm=get_norm(norm2, out_channels),
+            activation=activation2,
+        )
+
+        # default initialization
+        weight_init.c2_msra_fill(self.depthwise)
+        weight_init.c2_msra_fill(self.pointwise)
+
+    def forward(self, x):
+        return self.pointwise(self.depthwise(x))
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/deform_conv.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/deform_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..eca070f59645af4c9ccd003d99678f19538f355d
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/deform_conv.py
@@ -0,0 +1,501 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+from functools import lru_cache
+import torch
+from torch import nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+from torchvision.ops import deform_conv2d
+
+from detectron2 import _C
+
+from .wrappers import _NewEmptyTensorOp
+
+
+class _DeformConv(Function):
+    @staticmethod
+    def forward(
+        ctx,
+        input,
+        offset,
+        weight,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        deformable_groups=1,
+        im2col_step=64,
+    ):
+        if input is not None and input.dim() != 4:
+            raise ValueError(
+                "Expected 4D tensor as input, got {}D tensor instead.".format(input.dim())
+            )
+        ctx.stride = _pair(stride)
+        ctx.padding = _pair(padding)
+        ctx.dilation = _pair(dilation)
+        ctx.groups = groups
+        ctx.deformable_groups = deformable_groups
+        ctx.im2col_step = im2col_step
+
+        ctx.save_for_backward(input, offset, weight)
+
+        output = input.new_empty(
+            _DeformConv._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride)
+        )
+
+        ctx.bufs_ = [input.new_empty(0), input.new_empty(0)]  # columns, ones
+
+        if not input.is_cuda:
+            if deformable_groups != 1:
+                raise NotImplementedError(
+                    "Deformable Conv with deformable_groups != 1 is not supported on CPUs!"
+                )
+            return deform_conv2d(
+                input, offset, weight, stride=stride, padding=padding, dilation=dilation
+            )
+        else:
+            cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step)
+            assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize"
+
+            _C.deform_conv_forward(
+                input,
+                weight,
+                offset,
+                output,
+                ctx.bufs_[0],
+                ctx.bufs_[1],
+                weight.size(3),
+                weight.size(2),
+                ctx.stride[1],
+                ctx.stride[0],
+                ctx.padding[1],
+                ctx.padding[0],
+                ctx.dilation[1],
+                ctx.dilation[0],
+                ctx.groups,
+                ctx.deformable_groups,
+                cur_im2col_step,
+            )
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, offset, weight = ctx.saved_tensors
+
+        grad_input = grad_offset = grad_weight = None
+
+        if not grad_output.is_cuda:
+            raise NotImplementedError("Deformable Conv is not supported on CPUs!")
+        else:
+            cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step)
+            assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize"
+
+            if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+                grad_input = torch.zeros_like(input)
+                grad_offset = torch.zeros_like(offset)
+                _C.deform_conv_backward_input(
+                    input,
+                    offset,
+                    grad_output,
+                    grad_input,
+                    grad_offset,
+                    weight,
+                    ctx.bufs_[0],
+                    weight.size(3),
+                    weight.size(2),
+                    ctx.stride[1],
+                    ctx.stride[0],
+                    ctx.padding[1],
+                    ctx.padding[0],
+                    ctx.dilation[1],
+                    ctx.dilation[0],
+                    ctx.groups,
+                    ctx.deformable_groups,
+                    cur_im2col_step,
+                )
+
+            if ctx.needs_input_grad[2]:
+                grad_weight = torch.zeros_like(weight)
+                _C.deform_conv_backward_filter(
+                    input,
+                    offset,
+                    grad_output,
+                    grad_weight,
+                    ctx.bufs_[0],
+                    ctx.bufs_[1],
+                    weight.size(3),
+                    weight.size(2),
+                    ctx.stride[1],
+                    ctx.stride[0],
+                    ctx.padding[1],
+                    ctx.padding[0],
+                    ctx.dilation[1],
+                    ctx.dilation[0],
+                    ctx.groups,
+                    ctx.deformable_groups,
+                    1,
+                    cur_im2col_step,
+                )
+
+        return grad_input, grad_offset, grad_weight, None, None, None, None, None, None
+
+    @staticmethod
+    def _output_size(input, weight, padding, dilation, stride):
+        channels = weight.size(0)
+        output_size = (input.size(0), channels)
+        for d in range(input.dim() - 2):
+            in_size = input.size(d + 2)
+            pad = padding[d]
+            kernel = dilation[d] * (weight.size(d + 2) - 1) + 1
+            stride_ = stride[d]
+            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,)
+        if not all(map(lambda s: s > 0, output_size)):
+            raise ValueError(
+                "convolution input is too small (output would be {})".format(
+                    "x".join(map(str, output_size))
+                )
+            )
+        return output_size
+
+    @staticmethod
+    @lru_cache(maxsize=128)
+    def _cal_im2col_step(input_size, default_size):
+        """
+        Calculate proper im2col step size, which should be divisible by input_size and not larger
+        than prefer_size. Meanwhile the step size should be as large as possible to be more
+        efficient. So we choose the largest one among all divisors of input_size which are smaller
+        than prefer_size.
+        :param input_size: input batch size .
+        :param default_size: default preferred im2col step size.
+        :return: the largest proper step size.
+        """
+        if input_size <= default_size:
+            return input_size
+        best_step = 1
+        for step in range(2, min(int(math.sqrt(input_size)) + 1, default_size)):
+            if input_size % step == 0:
+                if input_size // step <= default_size:
+                    return input_size // step
+                best_step = step
+
+        return best_step
+
+
+class _ModulatedDeformConv(Function):
+    @staticmethod
+    def forward(
+        ctx,
+        input,
+        offset,
+        mask,
+        weight,
+        bias=None,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        deformable_groups=1,
+    ):
+        ctx.stride = stride
+        ctx.padding = padding
+        ctx.dilation = dilation
+        ctx.groups = groups
+        ctx.deformable_groups = deformable_groups
+        ctx.with_bias = bias is not None
+        if not ctx.with_bias:
+            bias = input.new_empty(1)  # fake tensor
+        if not input.is_cuda:
+            raise NotImplementedError("Deformable Conv is not supported on CPUs!")
+        if (
+            weight.requires_grad
+            or mask.requires_grad
+            or offset.requires_grad
+            or input.requires_grad
+        ):
+            ctx.save_for_backward(input, offset, mask, weight, bias)
+        output = input.new_empty(_ModulatedDeformConv._infer_shape(ctx, input, weight))
+        ctx._bufs = [input.new_empty(0), input.new_empty(0)]
+        _C.modulated_deform_conv_forward(
+            input,
+            weight,
+            bias,
+            ctx._bufs[0],
+            offset,
+            mask,
+            output,
+            ctx._bufs[1],
+            weight.shape[2],
+            weight.shape[3],
+            ctx.stride,
+            ctx.stride,
+            ctx.padding,
+            ctx.padding,
+            ctx.dilation,
+            ctx.dilation,
+            ctx.groups,
+            ctx.deformable_groups,
+            ctx.with_bias,
+        )
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        if not grad_output.is_cuda:
+            raise NotImplementedError("Deformable Conv is not supported on CPUs!")
+        input, offset, mask, weight, bias = ctx.saved_tensors
+        grad_input = torch.zeros_like(input)
+        grad_offset = torch.zeros_like(offset)
+        grad_mask = torch.zeros_like(mask)
+        grad_weight = torch.zeros_like(weight)
+        grad_bias = torch.zeros_like(bias)
+        _C.modulated_deform_conv_backward(
+            input,
+            weight,
+            bias,
+            ctx._bufs[0],
+            offset,
+            mask,
+            ctx._bufs[1],
+            grad_input,
+            grad_weight,
+            grad_bias,
+            grad_offset,
+            grad_mask,
+            grad_output,
+            weight.shape[2],
+            weight.shape[3],
+            ctx.stride,
+            ctx.stride,
+            ctx.padding,
+            ctx.padding,
+            ctx.dilation,
+            ctx.dilation,
+            ctx.groups,
+            ctx.deformable_groups,
+            ctx.with_bias,
+        )
+        if not ctx.with_bias:
+            grad_bias = None
+
+        return (
+            grad_input,
+            grad_offset,
+            grad_mask,
+            grad_weight,
+            grad_bias,
+            None,
+            None,
+            None,
+            None,
+            None,
+        )
+
+    @staticmethod
+    def _infer_shape(ctx, input, weight):
+        n = input.size(0)
+        channels_out = weight.size(0)
+        height, width = input.shape[2:4]
+        kernel_h, kernel_w = weight.shape[2:4]
+        height_out = (
+            height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1)
+        ) // ctx.stride + 1
+        width_out = (
+            width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1)
+        ) // ctx.stride + 1
+        return n, channels_out, height_out, width_out
+
+
+deform_conv = _DeformConv.apply
+modulated_deform_conv = _ModulatedDeformConv.apply
+
+
+class DeformConv(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        deformable_groups=1,
+        bias=False,
+        norm=None,
+        activation=None,
+    ):
+        """
+        Deformable convolution from :paper:`deformconv`.
+
+        Arguments are similar to :class:`Conv2D`. Extra arguments:
+
+        Args:
+            deformable_groups (int): number of groups used in deformable convolution.
+            norm (nn.Module, optional): a normalization layer
+            activation (callable(Tensor) -> Tensor): a callable activation function
+        """
+        super(DeformConv, self).__init__()
+
+        assert not bias
+        assert in_channels % groups == 0, "in_channels {} cannot be divisible by groups {}".format(
+            in_channels, groups
+        )
+        assert (
+            out_channels % groups == 0
+        ), "out_channels {} cannot be divisible by groups {}".format(out_channels, groups)
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.dilation = _pair(dilation)
+        self.groups = groups
+        self.deformable_groups = deformable_groups
+        self.norm = norm
+        self.activation = activation
+
+        self.weight = nn.Parameter(
+            torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)
+        )
+        self.bias = None
+
+        nn.init.kaiming_uniform_(self.weight, nonlinearity="relu")
+
+    def forward(self, x, offset):
+        if x.numel() == 0:
+            # When input is empty, we want to return a empty tensor with "correct" shape,
+            # So that the following operations will not panic
+            # if they check for the shape of the tensor.
+            # This computes the height and width of the output tensor
+            output_shape = [
+                (i + 2 * p - (di * (k - 1) + 1)) // s + 1
+                for i, p, di, k, s in zip(
+                    x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
+                )
+            ]
+            output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
+            return _NewEmptyTensorOp.apply(x, output_shape)
+
+        x = deform_conv(
+            x,
+            offset,
+            self.weight,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+            self.deformable_groups,
+        )
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def extra_repr(self):
+        tmpstr = "in_channels=" + str(self.in_channels)
+        tmpstr += ", out_channels=" + str(self.out_channels)
+        tmpstr += ", kernel_size=" + str(self.kernel_size)
+        tmpstr += ", stride=" + str(self.stride)
+        tmpstr += ", padding=" + str(self.padding)
+        tmpstr += ", dilation=" + str(self.dilation)
+        tmpstr += ", groups=" + str(self.groups)
+        tmpstr += ", deformable_groups=" + str(self.deformable_groups)
+        tmpstr += ", bias=False"
+        return tmpstr
+
+
+class ModulatedDeformConv(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        deformable_groups=1,
+        bias=True,
+        norm=None,
+        activation=None,
+    ):
+        """
+        Modulated deformable convolution from :paper:`deformconv2`.
+
+        Arguments are similar to :class:`Conv2D`. Extra arguments:
+
+        Args:
+            deformable_groups (int): number of groups used in deformable convolution.
+            norm (nn.Module, optional): a normalization layer
+            activation (callable(Tensor) -> Tensor): a callable activation function
+        """
+        super(ModulatedDeformConv, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.groups = groups
+        self.deformable_groups = deformable_groups
+        self.with_bias = bias
+        self.norm = norm
+        self.activation = activation
+
+        self.weight = nn.Parameter(
+            torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)
+        )
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(out_channels))
+        else:
+            self.bias = None
+
+        nn.init.kaiming_uniform_(self.weight, nonlinearity="relu")
+        if self.bias is not None:
+            nn.init.constant_(self.bias, 0)
+
+    def forward(self, x, offset, mask):
+        if x.numel() == 0:
+            output_shape = [
+                (i + 2 * p - (di * (k - 1) + 1)) // s + 1
+                for i, p, di, k, s in zip(
+                    x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
+                )
+            ]
+            output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
+            return _NewEmptyTensorOp.apply(x, output_shape)
+
+        x = modulated_deform_conv(
+            x,
+            offset,
+            mask,
+            self.weight,
+            self.bias,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+            self.deformable_groups,
+        )
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def extra_repr(self):
+        tmpstr = "in_channels=" + str(self.in_channels)
+        tmpstr += ", out_channels=" + str(self.out_channels)
+        tmpstr += ", kernel_size=" + str(self.kernel_size)
+        tmpstr += ", stride=" + str(self.stride)
+        tmpstr += ", padding=" + str(self.padding)
+        tmpstr += ", dilation=" + str(self.dilation)
+        tmpstr += ", groups=" + str(self.groups)
+        tmpstr += ", deformable_groups=" + str(self.deformable_groups)
+        tmpstr += ", bias=" + str(self.with_bias)
+        return tmpstr
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/mask_ops.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/mask_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..c698a03c4d3faf30c08da97169f010b64c0d1058
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/mask_ops.py
@@ -0,0 +1,260 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from typing import Tuple
+import torch
+from PIL import Image
+from torch.nn import functional as F
+
+from detectron2.structures import Boxes
+
+__all__ = ["paste_masks_in_image"]
+
+
+BYTES_PER_FLOAT = 4
+# TODO: This memory limit may be too much or too little. It would be better to
+# determine it based on available resources.
+GPU_MEM_LIMIT = 1024 ** 3  # 1 GB memory limit
+
+
+def _do_paste_mask(masks, boxes, img_h: int, img_w: int, skip_empty: bool = True):
+    """
+    Args:
+        masks: N, 1, H, W
+        boxes: N, 4
+        img_h, img_w (int):
+        skip_empty (bool): only paste masks within the region that
+            tightly bound all boxes, and returns the results this region only.
+            An important optimization for CPU.
+
+    Returns:
+        if skip_empty == False, a mask of shape (N, img_h, img_w)
+        if skip_empty == True, a mask of shape (N, h', w'), and the slice
+            object for the corresponding region.
+    """
+    # On GPU, paste all masks together (up to chunk size)
+    # by using the entire image to sample the masks
+    # Compared to pasting them one by one,
+    # this has more operations but is faster on COCO-scale dataset.
+    device = masks.device
+
+    if skip_empty and not torch.jit.is_scripting():
+        x0_int, y0_int = torch.clamp(boxes.min(dim=0).values.floor()[:2] - 1, min=0).to(
+            dtype=torch.int32
+        )
+        x1_int = torch.clamp(boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32)
+        y1_int = torch.clamp(boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32)
+    else:
+        x0_int, y0_int = 0, 0
+        x1_int, y1_int = img_w, img_h
+    x0, y0, x1, y1 = torch.split(boxes, 1, dim=1)  # each is Nx1
+
+    N = masks.shape[0]
+
+    img_y = torch.arange(y0_int, y1_int, device=device, dtype=torch.float32) + 0.5
+    img_x = torch.arange(x0_int, x1_int, device=device, dtype=torch.float32) + 0.5
+    img_y = (img_y - y0) / (y1 - y0) * 2 - 1
+    img_x = (img_x - x0) / (x1 - x0) * 2 - 1
+    # img_x, img_y have shapes (N, w), (N, h)
+
+    gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1))
+    gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1))
+    grid = torch.stack([gx, gy], dim=3)
+
+    if not torch.jit.is_scripting():
+        if not masks.dtype.is_floating_point:
+            masks = masks.float()
+    img_masks = F.grid_sample(masks, grid.to(masks.dtype), align_corners=False)
+
+    if skip_empty and not torch.jit.is_scripting():
+        return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int))
+    else:
+        return img_masks[:, 0], ()
+
+
+def paste_masks_in_image(
+    masks: torch.Tensor, boxes: Boxes, image_shape: Tuple[int, int], threshold: float = 0.5
+):
+    """
+    Paste a set of masks that are of a fixed resolution (e.g., 28 x 28) into an image.
+    The location, height, and width for pasting each mask is determined by their
+    corresponding bounding boxes in boxes.
+
+    Note:
+        This is a complicated but more accurate implementation. In actual deployment, it is
+        often enough to use a faster but less accurate implementation.
+        See :func:`paste_mask_in_image_old` in this file for an alternative implementation.
+
+    Args:
+        masks (tensor): Tensor of shape (Bimg, Hmask, Wmask), where Bimg is the number of
+            detected object instances in the image and Hmask, Wmask are the mask width and mask
+            height of the predicted mask (e.g., Hmask = Wmask = 28). Values are in [0, 1].
+        boxes (Boxes or Tensor): A Boxes of length Bimg or Tensor of shape (Bimg, 4).
+            boxes[i] and masks[i] correspond to the same object instance.
+        image_shape (tuple): height, width
+        threshold (float): A threshold in [0, 1] for converting the (soft) masks to
+            binary masks.
+
+    Returns:
+        img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the
+        number of detected object instances and Himage, Wimage are the image width
+        and height. img_masks[i] is a binary mask for object instance i.
+    """
+
+    assert masks.shape[-1] == masks.shape[-2], "Only square mask predictions are supported"
+    N = len(masks)
+    if N == 0:
+        return masks.new_empty((0,) + image_shape, dtype=torch.uint8)
+    if not isinstance(boxes, torch.Tensor):
+        boxes = boxes.tensor
+    device = boxes.device
+    assert len(boxes) == N, boxes.shape
+
+    img_h, img_w = image_shape
+
+    # The actual implementation split the input into chunks,
+    # and paste them chunk by chunk.
+    if device.type == "cpu" or torch.jit.is_scripting():
+        # CPU is most efficient when they are pasted one by one with skip_empty=True
+        # so that it performs minimal number of operations.
+        num_chunks = N
+    else:
+        # GPU benefits from parallelism for larger chunks, but may have memory issue
+        # int(img_h) because shape may be tensors in tracing
+        num_chunks = int(np.ceil(N * int(img_h) * int(img_w) * BYTES_PER_FLOAT / GPU_MEM_LIMIT))
+        assert (
+            num_chunks <= N
+        ), "Default GPU_MEM_LIMIT in mask_ops.py is too small; try increasing it"
+    chunks = torch.chunk(torch.arange(N, device=device), num_chunks)
+
+    img_masks = torch.zeros(
+        N, img_h, img_w, device=device, dtype=torch.bool if threshold >= 0 else torch.uint8
+    )
+    for inds in chunks:
+        masks_chunk, spatial_inds = _do_paste_mask(
+            masks[inds, None, :, :], boxes[inds], img_h, img_w, skip_empty=device.type == "cpu"
+        )
+
+        if threshold >= 0:
+            masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool)
+        else:
+            # for visualization and debugging
+            masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8)
+
+        if torch.jit.is_scripting():  # Scripting does not use the optimized codepath
+            img_masks[inds] = masks_chunk
+        else:
+            img_masks[(inds,) + spatial_inds] = masks_chunk
+    return img_masks
+
+
+# The below are the original paste function (from Detectron1) which has
+# larger quantization error.
+# It is faster on CPU, while the aligned one is faster on GPU thanks to grid_sample.
+
+
+def paste_mask_in_image_old(mask, box, img_h, img_w, threshold):
+    """
+    Paste a single mask in an image.
+    This is a per-box implementation of :func:`paste_masks_in_image`.
+    This function has larger quantization error due to incorrect pixel
+    modeling and is not used any more.
+
+    Args:
+        mask (Tensor): A tensor of shape (Hmask, Wmask) storing the mask of a single
+            object instance. Values are in [0, 1].
+        box (Tensor): A tensor of shape (4, ) storing the x0, y0, x1, y1 box corners
+            of the object instance.
+        img_h, img_w (int): Image height and width.
+        threshold (float): Mask binarization threshold in [0, 1].
+
+    Returns:
+        im_mask (Tensor):
+            The resized and binarized object mask pasted into the original
+            image plane (a tensor of shape (img_h, img_w)).
+    """
+    # Conversion from continuous box coordinates to discrete pixel coordinates
+    # via truncation (cast to int32). This determines which pixels to paste the
+    # mask onto.
+    box = box.to(dtype=torch.int32)  # Continuous to discrete coordinate conversion
+    # An example (1D) box with continuous coordinates (x0=0.7, x1=4.3) will map to
+    # a discrete coordinates (x0=0, x1=4). Note that box is mapped to 5 = x1 - x0 + 1
+    # pixels (not x1 - x0 pixels).
+    samples_w = box[2] - box[0] + 1  # Number of pixel samples, *not* geometric width
+    samples_h = box[3] - box[1] + 1  # Number of pixel samples, *not* geometric height
+
+    # Resample the mask from it's original grid to the new samples_w x samples_h grid
+    mask = Image.fromarray(mask.cpu().numpy())
+    mask = mask.resize((samples_w, samples_h), resample=Image.BILINEAR)
+    mask = np.array(mask, copy=False)
+
+    if threshold >= 0:
+        mask = np.array(mask > threshold, dtype=np.uint8)
+        mask = torch.from_numpy(mask)
+    else:
+        # for visualization and debugging, we also
+        # allow it to return an unmodified mask
+        mask = torch.from_numpy(mask * 255).to(torch.uint8)
+
+    im_mask = torch.zeros((img_h, img_w), dtype=torch.uint8)
+    x_0 = max(box[0], 0)
+    x_1 = min(box[2] + 1, img_w)
+    y_0 = max(box[1], 0)
+    y_1 = min(box[3] + 1, img_h)
+
+    im_mask[y_0:y_1, x_0:x_1] = mask[
+        (y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0])
+    ]
+    return im_mask
+
+
+# Our pixel modeling requires extrapolation for any continuous
+# coordinate < 0.5 or > length - 0.5. When sampling pixels on the masks,
+# we would like this extrapolation to be an interpolation between boundary values and zero,
+# instead of using absolute zero or boundary values.
+# Therefore `paste_mask_in_image_old` is often used with zero padding around the masks like this:
+# masks, scale = pad_masks(masks[:, 0, :, :], 1)
+# boxes = scale_boxes(boxes.tensor, scale)
+
+
+def pad_masks(masks, padding):
+    """
+    Args:
+        masks (tensor): A tensor of shape (B, M, M) representing B masks.
+        padding (int): Number of cells to pad on all sides.
+
+    Returns:
+        The padded masks and the scale factor of the padding size / original size.
+    """
+    B = masks.shape[0]
+    M = masks.shape[-1]
+    pad2 = 2 * padding
+    scale = float(M + pad2) / M
+    padded_masks = masks.new_zeros((B, M + pad2, M + pad2))
+    padded_masks[:, padding:-padding, padding:-padding] = masks
+    return padded_masks, scale
+
+
+def scale_boxes(boxes, scale):
+    """
+    Args:
+        boxes (tensor): A tensor of shape (B, 4) representing B boxes with 4
+            coords representing the corners x0, y0, x1, y1,
+        scale (float): The box scaling factor.
+
+    Returns:
+        Scaled boxes.
+    """
+    w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
+    h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
+    x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
+    y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
+
+    w_half *= scale
+    h_half *= scale
+
+    scaled_boxes = torch.zeros_like(boxes)
+    scaled_boxes[:, 0] = x_c - w_half
+    scaled_boxes[:, 2] = x_c + w_half
+    scaled_boxes[:, 1] = y_c - h_half
+    scaled_boxes[:, 3] = y_c + h_half
+    return scaled_boxes
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/nms.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac14d459259b19a1a145adff2817a0ca0441b7eb
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/nms.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from typing import List
+import torch
+from torchvision.ops import boxes as box_ops
+from torchvision.ops import nms  # BC-compat
+
+from detectron2.utils.env import TORCH_VERSION
+
+if TORCH_VERSION < (1, 7):
+    from detectron2 import _C
+
+    nms_rotated_func = _C.nms_rotated
+else:
+    nms_rotated_func = torch.ops.detectron2.nms_rotated
+
+
+def batched_nms(
+    boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float
+):
+    """
+    Same as torchvision.ops.boxes.batched_nms, but safer.
+    """
+    assert boxes.shape[-1] == 4
+    # TODO may need better strategy.
+    # Investigate after having a fully-cuda NMS op.
+    if len(boxes) < 40000:
+        # fp16 does not have enough range for batched NMS
+        return box_ops.batched_nms(boxes.float(), scores, idxs, iou_threshold)
+
+    result_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
+    for id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()):
+        mask = (idxs == id).nonzero().view(-1)
+        keep = nms(boxes[mask], scores[mask], iou_threshold)
+        result_mask[mask[keep]] = True
+    keep = result_mask.nonzero().view(-1)
+    keep = keep[scores[keep].argsort(descending=True)]
+    return keep
+
+
+# Note: this function (nms_rotated) might be moved into
+# torchvision/ops/boxes.py in the future
+def nms_rotated(boxes, scores, iou_threshold):
+    """
+    Performs non-maximum suppression (NMS) on the rotated boxes according
+    to their intersection-over-union (IoU).
+
+    Rotated NMS iteratively removes lower scoring rotated boxes which have an
+    IoU greater than iou_threshold with another (higher scoring) rotated box.
+
+    Note that RotatedBox (5, 3, 4, 2, -90) covers exactly the same region as
+    RotatedBox (5, 3, 4, 2, 90) does, and their IoU will be 1. However, they
+    can be representing completely different objects in certain tasks, e.g., OCR.
+
+    As for the question of whether rotated-NMS should treat them as faraway boxes
+    even though their IOU is 1, it depends on the application and/or ground truth annotation.
+
+    As an extreme example, consider a single character v and the square box around it.
+
+    If the angle is 0 degree, the object (text) would be read as 'v';
+
+    If the angle is 90 degrees, the object (text) would become '>';
+
+    If the angle is 180 degrees, the object (text) would become '^';
+
+    If the angle is 270/-90 degrees, the object (text) would become '<'
+
+    All of these cases have IoU of 1 to each other, and rotated NMS that only
+    uses IoU as criterion would only keep one of them with the highest score -
+    which, practically, still makes sense in most cases because typically
+    only one of theses orientations is the correct one. Also, it does not matter
+    as much if the box is only used to classify the object (instead of transcribing
+    them with a sequential OCR recognition model) later.
+
+    On the other hand, when we use IoU to filter proposals that are close to the
+    ground truth during training, we should definitely take the angle into account if
+    we know the ground truth is labeled with the strictly correct orientation (as in,
+    upside-down words are annotated with -180 degrees even though they can be covered
+    with a 0/90/-90 degree box, etc.)
+
+    The way the original dataset is annotated also matters. For example, if the dataset
+    is a 4-point polygon dataset that does not enforce ordering of vertices/orientation,
+    we can estimate a minimum rotated bounding box to this polygon, but there's no way
+    we can tell the correct angle with 100% confidence (as shown above, there could be 4 different
+    rotated boxes, with angles differed by 90 degrees to each other, covering the exactly
+    same region). In that case we have to just use IoU to determine the box
+    proximity (as many detection benchmarks (even for text) do) unless there're other
+    assumptions we can make (like width is always larger than height, or the object is not
+    rotated by more than 90 degrees CCW/CW, etc.)
+
+    In summary, not considering angles in rotated NMS seems to be a good option for now,
+    but we should be aware of its implications.
+
+    Args:
+        boxes (Tensor[N, 5]): Rotated boxes to perform NMS on. They are expected to be in
+           (x_center, y_center, width, height, angle_degrees) format.
+        scores (Tensor[N]): Scores for each one of the rotated boxes
+        iou_threshold (float): Discards all overlapping rotated boxes with IoU < iou_threshold
+
+    Returns:
+        keep (Tensor): int64 tensor with the indices of the elements that have been kept
+        by Rotated NMS, sorted in decreasing order of scores
+    """
+    return nms_rotated_func(boxes, scores, iou_threshold)
+
+
+# Note: this function (batched_nms_rotated) might be moved into
+# torchvision/ops/boxes.py in the future
+def batched_nms_rotated(boxes, scores, idxs, iou_threshold):
+    """
+    Performs non-maximum suppression in a batched fashion.
+
+    Each index value correspond to a category, and NMS
+    will not be applied between elements of different categories.
+
+    Args:
+        boxes (Tensor[N, 5]):
+           boxes where NMS will be performed. They
+           are expected to be in (x_ctr, y_ctr, width, height, angle_degrees) format
+        scores (Tensor[N]):
+           scores for each one of the boxes
+        idxs (Tensor[N]):
+           indices of the categories for each one of the boxes.
+        iou_threshold (float):
+           discards all overlapping boxes
+           with IoU < iou_threshold
+
+    Returns:
+        Tensor:
+            int64 tensor with the indices of the elements that have been kept
+            by NMS, sorted in decreasing order of scores
+    """
+    assert boxes.shape[-1] == 5
+
+    if boxes.numel() == 0:
+        return torch.empty((0,), dtype=torch.int64, device=boxes.device)
+    boxes = boxes.float()  # fp16 does not have enough range for batched NMS
+    # Strategy: in order to perform NMS independently per class,
+    # we add an offset to all the boxes. The offset is dependent
+    # only on the class idx, and is large enough so that boxes
+    # from different classes do not overlap
+
+    # Note that batched_nms in torchvision/ops/boxes.py only uses max_coordinate,
+    # which won't handle negative coordinates correctly.
+    # Here by using min_coordinate we can make sure the negative coordinates are
+    # correctly handled.
+    max_coordinate = (
+        torch.max(boxes[:, 0], boxes[:, 1]) + torch.max(boxes[:, 2], boxes[:, 3]) / 2
+    ).max()
+    min_coordinate = (
+        torch.min(boxes[:, 0], boxes[:, 1]) - torch.max(boxes[:, 2], boxes[:, 3]) / 2
+    ).min()
+    offsets = idxs.to(boxes) * (max_coordinate - min_coordinate + 1)
+    boxes_for_nms = boxes.clone()  # avoid modifying the original values in boxes
+    boxes_for_nms[:, :2] += offsets[:, None]
+    keep = nms_rotated(boxes_for_nms, scores, iou_threshold)
+    return keep
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/roi_align.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/roi_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcbf5f4c7025c905603f95dce7bb5c42d5379987
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/roi_align.py
@@ -0,0 +1,72 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from torch import nn
+from torchvision.ops import roi_align
+
+
+# NOTE: torchvision's RoIAlign has a different default aligned=False
+class ROIAlign(nn.Module):
+    def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
+        """
+        Args:
+            output_size (tuple): h, w
+            spatial_scale (float): scale the input boxes by this number
+            sampling_ratio (int): number of inputs samples to take for each output
+                sample. 0 to take samples densely.
+            aligned (bool): if False, use the legacy implementation in
+                Detectron. If True, align the results more perfectly.
+
+        Note:
+            The meaning of aligned=True:
+
+            Given a continuous coordinate c, its two neighboring pixel indices (in our
+            pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
+            c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
+            from the underlying signal at continuous coordinates 0.5 and 1.5). But the original
+            roi_align (aligned=False) does not subtract the 0.5 when computing neighboring
+            pixel indices and therefore it uses pixels with a slightly incorrect alignment
+            (relative to our pixel model) when performing bilinear interpolation.
+
+            With `aligned=True`,
+            we first appropriately scale the ROI and then shift it by -0.5
+            prior to calling roi_align. This produces the correct neighbors; see
+            detectron2/tests/test_roi_align.py for verification.
+
+            The difference does not make a difference to the model's performance if
+            ROIAlign is used together with conv layers.
+        """
+        super().__init__()
+        self.output_size = output_size
+        self.spatial_scale = spatial_scale
+        self.sampling_ratio = sampling_ratio
+        self.aligned = aligned
+
+        from torchvision import __version__
+
+        version = tuple(int(x) for x in __version__.split(".")[:2])
+        # https://github.com/pytorch/vision/pull/2438
+        assert version >= (0, 7), "Require torchvision >= 0.7"
+
+    def forward(self, input, rois):
+        """
+        Args:
+            input: NCHW images
+            rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
+        """
+        assert rois.dim() == 2 and rois.size(1) == 5
+        return roi_align(
+            input,
+            rois.to(dtype=input.dtype),
+            self.output_size,
+            self.spatial_scale,
+            self.sampling_ratio,
+            self.aligned,
+        )
+
+    def __repr__(self):
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += "output_size=" + str(self.output_size)
+        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
+        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
+        tmpstr += ", aligned=" + str(self.aligned)
+        tmpstr += ")"
+        return tmpstr
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/roi_align_rotated.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/roi_align_rotated.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3775e08fc9b9172f73c8ec7025a51ef2edd0a1d
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/roi_align_rotated.py
@@ -0,0 +1,93 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+from torch import nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from detectron2 import _C
+
+
+class _ROIAlignRotated(Function):
+    @staticmethod
+    def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
+        ctx.save_for_backward(roi)
+        ctx.output_size = _pair(output_size)
+        ctx.spatial_scale = spatial_scale
+        ctx.sampling_ratio = sampling_ratio
+        ctx.input_shape = input.size()
+        output = _C.roi_align_rotated_forward(
+            input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
+        )
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        (rois,) = ctx.saved_tensors
+        output_size = ctx.output_size
+        spatial_scale = ctx.spatial_scale
+        sampling_ratio = ctx.sampling_ratio
+        bs, ch, h, w = ctx.input_shape
+        grad_input = _C.roi_align_rotated_backward(
+            grad_output,
+            rois,
+            spatial_scale,
+            output_size[0],
+            output_size[1],
+            bs,
+            ch,
+            h,
+            w,
+            sampling_ratio,
+        )
+        return grad_input, None, None, None, None, None
+
+
+roi_align_rotated = _ROIAlignRotated.apply
+
+
+class ROIAlignRotated(nn.Module):
+    def __init__(self, output_size, spatial_scale, sampling_ratio):
+        """
+        Args:
+            output_size (tuple): h, w
+            spatial_scale (float): scale the input boxes by this number
+            sampling_ratio (int): number of inputs samples to take for each output
+                sample. 0 to take samples densely.
+
+        Note:
+            ROIAlignRotated supports continuous coordinate by default:
+            Given a continuous coordinate c, its two neighboring pixel indices (in our
+            pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
+            c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
+            from the underlying signal at continuous coordinates 0.5 and 1.5).
+        """
+        super(ROIAlignRotated, self).__init__()
+        self.output_size = output_size
+        self.spatial_scale = spatial_scale
+        self.sampling_ratio = sampling_ratio
+
+    def forward(self, input, rois):
+        """
+        Args:
+            input: NCHW images
+            rois: Bx6 boxes. First column is the index into N.
+                The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees).
+        """
+        assert rois.dim() == 2 and rois.size(1) == 6
+        orig_dtype = input.dtype
+        if orig_dtype == torch.float16:
+            input = input.float()
+            rois = rois.float()
+        return roi_align_rotated(
+            input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
+        ).to(dtype=orig_dtype)
+
+    def __repr__(self):
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += "output_size=" + str(self.output_size)
+        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
+        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
+        tmpstr += ")"
+        return tmpstr
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/rotated_boxes.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/rotated_boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0004f765ef355ad47d92d26d3012be382e1b3eca
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/rotated_boxes.py
@@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from detectron2 import _C
+
+
+def pairwise_iou_rotated(boxes1, boxes2):
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+
+    Both sets of boxes are expected to be in
+    (x_center, y_center, width, height, angle) format.
+
+    Arguments:
+        boxes1 (Tensor[N, 5])
+        boxes2 (Tensor[M, 5])
+
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+    return _C.box_iou_rotated(boxes1, boxes2)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/shape_spec.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/shape_spec.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe7e8e261c1ab1bb1636bd7a245068d64e632306
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/shape_spec.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+from collections import namedtuple
+
+
+class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
+    """
+    A simple structure that contains basic shape specification about a tensor.
+    It is often used as the auxiliary inputs/outputs of models,
+    to complement the lack of shape inference ability among pytorch modules.
+
+    Attributes:
+        channels:
+        height:
+        width:
+        stride:
+    """
+
+    def __new__(cls, channels=None, height=None, width=None, stride=None):
+        return super().__new__(cls, channels, height, width, stride)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/wrappers.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bb4e7c1a1334c5501a6c492ddfa836dadf0beab
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/layers/wrappers.py
@@ -0,0 +1,110 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Wrappers around on some nn functions, mainly to support empty tensors.
+
+Ideally, add support directly in PyTorch to empty tensors in those functions.
+
+These can be removed once https://github.com/pytorch/pytorch/issues/12013
+is implemented
+"""
+
+from typing import List
+import torch
+from torch.nn import functional as F
+
+
+def cat(tensors: List[torch.Tensor], dim: int = 0):
+    """
+    Efficient version of torch.cat that avoids a copy if there is only a single element in a list
+    """
+    assert isinstance(tensors, (list, tuple))
+    if len(tensors) == 1:
+        return tensors[0]
+    return torch.cat(tensors, dim)
+
+
+def cross_entropy(input, target, *, reduction="mean", **kwargs):
+    """
+    Same as `torch.nn.functional.cross_entropy`, but returns 0 (instead of nan)
+    for empty inputs.
+    """
+    if target.numel() == 0 and reduction == "mean":
+        return input.sum() * 0.0  # connect the gradient
+    return F.cross_entropy(input, target, **kwargs)
+
+
+class _NewEmptyTensorOp(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, new_shape):
+        ctx.shape = x.shape
+        return x.new_empty(new_shape)
+
+    @staticmethod
+    def backward(ctx, grad):
+        shape = ctx.shape
+        return _NewEmptyTensorOp.apply(grad, shape), None
+
+
+class Conv2d(torch.nn.Conv2d):
+    """
+    A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
+
+        Args:
+            norm (nn.Module, optional): a normalization layer
+            activation (callable(Tensor) -> Tensor): a callable activation function
+
+        It assumes that norm layer is used before activation.
+        """
+        norm = kwargs.pop("norm", None)
+        activation = kwargs.pop("activation", None)
+        super().__init__(*args, **kwargs)
+
+        self.norm = norm
+        self.activation = activation
+
+    def forward(self, x):
+        # torchscript does not support SyncBatchNorm yet
+        # https://github.com/pytorch/pytorch/issues/40507
+        # and we skip these codes in torchscript since:
+        # 1. currently we only support torchscript in evaluation mode
+        # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or
+        # later version, `Conv2d` in these PyTorch versions has already supported empty inputs.
+        if not torch.jit.is_scripting():
+            if x.numel() == 0 and self.training:
+                # https://github.com/pytorch/pytorch/issues/12013
+                assert not isinstance(
+                    self.norm, torch.nn.SyncBatchNorm
+                ), "SyncBatchNorm does not support empty inputs!"
+
+        x = F.conv2d(
+            x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
+        )
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+
+ConvTranspose2d = torch.nn.ConvTranspose2d
+BatchNorm2d = torch.nn.BatchNorm2d
+interpolate = F.interpolate
+Linear = torch.nn.Linear
+
+
+def nonzero_tuple(x):
+    """
+    A 'as_tuple=True' version of torch.nonzero to support torchscript.
+    because of https://github.com/pytorch/pytorch/issues/38718
+    """
+    if torch.jit.is_scripting():
+        if x.dim() == 0:
+            return x.unsqueeze(0).nonzero().unbind(1)
+        return x.nonzero().unbind(1)
+    else:
+        return x.nonzero(as_tuple=True)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcae6e18502bab72d76e220b7144b8c262d80e1f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Model Zoo API for Detectron2: a collection of functions to create common model architectures
+listed in `MODEL_ZOO.md <https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md>`_,
+and optionally load their pre-trained weights.
+"""
+
+from .model_zoo import get, get_config_file, get_checkpoint_url, get_config
+
+__all__ = ["get_checkpoint_url", "get", "get_config_file", "get_config"]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RCNN-C4.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RCNN-C4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fbf34a0ea57a587e09997edd94c4012d69d0b6ad
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RCNN-C4.yaml
@@ -0,0 +1,18 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RPN:
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "Res5ROIHeads"
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RCNN-DilatedC5.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RCNN-DilatedC5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0d6d16bdaf532f09e4976f0aa240a49e748da27
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RCNN-DilatedC5.yaml
@@ -0,0 +1,31 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RESNETS:
+    OUT_FEATURES: ["res5"]
+    RES5_DILATION: 2
+  RPN:
+    IN_FEATURES: ["res5"]
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["res5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RCNN-FPN.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RCNN-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e020f2e7b2f26765be317f907126a1556621abf
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RCNN-FPN.yaml
@@ -0,0 +1,42 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RetinaNet.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RetinaNet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b45b982bbf84b34d2a6a172ab0a946b1029f7c8
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Base-RetinaNet.yaml
@@ -0,0 +1,25 @@
+MODEL:
+  META_ARCHITECTURE: "RetinaNet"
+  BACKBONE:
+    NAME: "build_retinanet_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
+  FPN:
+    IN_FEATURES: ["res3", "res4", "res5"]
+  RETINANET:
+    IOU_THRESHOLDS: [0.4, 0.5]
+    IOU_LABELS: [0, -1, 1]
+    SMOOTH_L1_LOSS_BETA: 0.0
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..773ac10e87c626760d00d831bf664ce9ff073c49
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,17 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  LOAD_PROPOSALS: True
+  RESNETS:
+    DEPTH: 50
+  PROPOSAL_GENERATOR:
+    NAME: "PrecomputedProposals"
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
+  TEST: ("coco_2017_val",)
+  PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+DATALOADER:
+  # proposals are part of the dataset_dicts, and take a lot of RAM
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db142cd671c1841b4f64cf130bee7f7954ecdd28
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bceb6b343618d8cd9a6c414ff9eb86ab31cc230a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57a098f53ee8c54ecfa354cc96efefd890dc1b72
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f96130105c3ba6ab393e0932870903875f5cb732
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc51bce390a85ee3529ffdcebde05748e1646be0
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0fe96f57febdac5790ea4cec168fa4b97ac4807a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33fadeb87d1ef67ab2b55926b9a652ab4ac4a27d
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3262019a1211b910d3b371569199ed1afaacf6a4
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41395182bf5c9dd8ab1241c4414068817298d554
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c9b5ab77157baa581d90d9847c045c19ed6ffa3
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: False
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4abb1b9a547957aa6afc0b29129e00f89cf98d59
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a24ce3a9a108a8792e18c8aabfb7b712f0d3725
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b5412d4a7aef1d6c3f7c1e34f94007de639b833
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e04821156b0376ba5215d5ce5b7010a36b43e6a1
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  META_ARCHITECTURE: "ProposalNetwork"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  RPN:
+    PRE_NMS_TOPK_TEST: 12000
+    POST_NMS_TOPK_TEST: 2000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dc9c95203b1c3c9cd9bb9876bb8d9a5dd9b31d9a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "ProposalNetwork"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  RPN:
+    POST_NMS_TOPK_TEST: 2000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a94cc45a0f2aaa8c92e14871c553b736545e327
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67b70cf4be8c19f5dc735b6f55a8690698f34b69
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1935a302d2d0fa7f69553b3fd50b5a7082c6c0d1
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9aeb4eac38026dbb867e799f9fd3a8d8eb3af80
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..38ed867d897dfec839cbcf11a2e2dc8abb92f07c
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b13eefab2a049c48d94d5051c82ceb6dbde40579
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d401016358f967f6619d88b1c9bd5673a1cdeba8
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d50fb866ca7811a87b42555c7213f88e00bf6df1
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bec680ee17a474fefe527b7b79d26266e75c09f0
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  RPN:
+    BBOX_REG_LOSS_TYPE: "giou"
+    BBOX_REG_LOSS_WEIGHT: 2.0
+  ROI_BOX_HEAD:
+    BBOX_REG_LOSS_TYPE: "giou"
+    BBOX_REG_LOSS_WEIGHT: 10.0
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be7d06b8e0f032ee7fcaabd7c122158518489fd2
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d14c63f74383bfc308750f51d51344398b02a239
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e03944a42d2e497da5ceca17c8fda797dac3f82
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  KEYPOINT_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
+  RPN:
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9309535c57a1aa7d23297aac80a9bd78a6c79fcc
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7bf85cf745b53b3e7ab28fe94b7f4f9e7fe6e335
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a07f243f650a497b9372501e3face75194cf0941
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4bfa20a98c0a65c6bd60e93b07e8f4b7d92a867
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,12 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f00d54b760c2b9271c75643e0a1ab1ffc0d9543a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  MASK_ON: True
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic_separated",)
+  TEST: ("coco_2017_val_panoptic_separated",)
+DATALOADER:
+  FILTER_EMPTY_ANNOTATIONS: False
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e01f6fb31e9b00b1857b7de3b5074184d1f4a21
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6afa2c1cc92495309ed1553a17359fe5d7d6566e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b956b3f673e78649184fe2c50e2700b3f1f14794
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a7aaeb961581ed9492c4cfe5a69a1eb60495b3e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  # For better, more stable performance initialize from COCO
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+  MASK_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 1024
+  MAX_SIZE_TRAIN: 2048
+  MAX_SIZE_TEST: 2048
+DATASETS:
+  TRAIN: ("cityscapes_fine_instance_seg_train",)
+  TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+  BASE_LR: 0.01
+  STEPS: (18000,)
+  MAX_ITER: 24000
+  IMS_PER_BATCH: 8
+TEST:
+  EVAL_PERIOD: 8000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6ce77f137fa2c4e5254a62b58c18b8b76096f2aa
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,17 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aacf868ba5290c752031c130a2081af48afc0808
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ea86a8d8e2cd3e51cbc7311b0d00710c07d01f6
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_MASK_HEAD:
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0c3a1bbc0a09e1384de522f30c443ba1e36fafa
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..64b4caa4ef2b284782367ea702e1ae6653472630
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8b822c6c006ba642f4caf9b55e7983f6797427a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,23 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca4dd97144561276ecaabbb6c254e3a7737ac157
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f313295ee5f0d553d394ce2efe003810c79af47d
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6528f7c31c8cfbf139c14fd0cae598592d8e898
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,26 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abb33b618932e94b66239945ac892f4c84a6e8f8
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2201ad5c46ded91ccfa47b7698a521625c5e447
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc117f6b5e3e51558ec2f01b73c5365622e5ce25
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
@@ -0,0 +1,36 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 152
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+  ROI_HEADS:
+    NAME: "CascadeROIHeads"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NUM_CONV: 8
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  IMS_PER_BATCH: 128
+  STEPS: (35000, 45000)
+  MAX_ITER: 50000
+  BASE_LR: 0.16
+INPUT:
+  MIN_SIZE_TRAIN: (640, 864)
+  MIN_SIZE_TRAIN_SAMPLING: "range"
+  MAX_SIZE_TRAIN: 1440
+  CROP:
+    ENABLED: True
+TEST:
+  EVAL_PERIOD: 2500
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c3b767ff473bbab7225cc8a4a92608543d78246
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    CLS_AGNOSTIC_MASK: True
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04ff988d073ef9169ee4ca2cbce0d6f030c15232
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..68c0ca58d7df97ca728c339da0ca9828fe6be318
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74d274e5a529b5a8afe186940868f9d48c6112b3
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "GN"
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+  ROI_MASK_HEAD:
+    NORM: "GN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..11ebb076ba529f26c71a0d972e96ca4c2d6a830b
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
@@ -0,0 +1,24 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+    STRIDE_IN_1X1: True
+  FPN:
+    NORM: "SyncBN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "SyncBN"
+  ROI_MASK_HEAD:
+    NORM: "SyncBN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+TEST:
+  PRECISE_BN:
+    ENABLED: True
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34016cea3ca9d7fb69ef4fe01d6b47ee8690a13b
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
@@ -0,0 +1,26 @@
+# A large PanopticFPN for demo purposes.
+# Use GN on backbone to support semantic seg.
+# Use Cascade + Deform Conv to improve localization.
+_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
+  RESNETS:
+    DEPTH: 101
+    NORM: "GN"
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (105000, 125000)
+  MAX_ITER: 135000
+  IMS_PER_BATCH: 32
+  BASE_LR: 0.04
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3400288cde242fcf66eef7f63b5a9165ca663c5
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,13 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  # Train from random initialization.
+  WEIGHTS: ""
+  # It makes sense to divide by STD when training from scratch
+  # But it seems to make no difference on the results and C2's models didn't do this.
+  # So we keep things consistent with C2.
+  # PIXEL_STD: [57.375, 57.12, 58.395]
+  MASK_ON: True
+  BACKBONE:
+    FREEZE_AT: 0
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d90c9ff0ef4573252ee165b4c958ec5f74178176
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60d4e42330e396a1901437df8e17b262d5ad547a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/semantic_R_50_FPN_1x.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/semantic_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac256e1372770ab3d9ae522c962de0fd0dbceeb5
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/Misc/semantic_R_50_FPN_1x.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_panoptic_stuffonly",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea2a6baaebd1a186db18f2904430ffb25901898e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 20
+INPUT:
+  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+  MIN_SIZE_TEST: 800
+DATASETS:
+  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+  TEST: ('voc_2007_test',)
+SOLVER:
+  STEPS: (12000, 16000)
+  MAX_ITER: 18000  # 17.4 epochs
+  WARMUP_ITERS: 100
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e554cab18a358a27b630c1ab0c2359666b0e1514
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 20
+INPUT:
+  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+  MIN_SIZE_TEST: 800
+DATASETS:
+  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+  TEST: ('voc_2007_test',)
+SOLVER:
+  STEPS: (12000, 16000)
+  MAX_ITER: 18000  # 17.4 epochs
+  WARMUP_ITERS: 100
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc5a4116cb096278823049c1f823e99f8e16e97e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP",  43.87, 0.02]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e41a0fe7ffe9c3531741df49e546aa45cfe4fdee
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2f37e5e2cc2a9e195e13703e9930e67e0f9a896
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52fc0ec03c8b87ab2be1dda97bec1e8c93e6bb5c
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+  TEST: ("coco_2017_val_100",)
+  PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14cf2aa82aec52ad44e28ead0665dad811d55457
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
+DATASETS:
+  TEST: ("keypoints_coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3dd209f693bd0bfdd46a2c9e7e750dede3abc141
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,16 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 1
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_val_100",)
+  TEST: ("keypoints_coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b92392f1c4457033ae4c87a521e339fe9e184ce
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
@@ -0,0 +1,30 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+    NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
+    LOSS_WEIGHT: 4.0
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
+  RPN:
+    SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_val",)
+  TEST: ("keypoints_coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  WARMUP_FACTOR: 0.33333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bd962878fea64035887c48981beeb8d41bfdbd0
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,28 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
+  RPN:
+    SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_val",)
+  TEST: ("keypoints_coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  WARMUP_FACTOR: 0.33333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab6e69812b94ea7e071f29d9a6937d5c70805b5b
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.001
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "value"
+    CLIP_VALUE: 1.0
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2d5b7ff87e069f8c774a230bdfd47b8c12d18a3
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c4f1214efa520944fd941daec082ad45c164a23
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.001
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f68dd8f96c7896b5fc95d694a399f2ce417c1deb
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val",)
+  TEST: ("coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (600,)
+  MAX_SIZE_TRAIN: 1000
+  MIN_SIZE_TEST: 800
+  MAX_SIZE_TEST: 1000
+SOLVER:
+  IMS_PER_BATCH: 8  # base uses 16
+  WARMUP_FACTOR: 0.33333
+  WARMUP_ITERS: 100
+  STEPS: (11000, 11600)
+  MAX_ITER: 12000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3ce6cf922ae07fba5b5e01edbac19bf58a8e9dd
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e5454bfd95cc37749c50aec7866f32d9a80ca2b7
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP",  42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
+  AUG:
+    ENABLED: True
+    MIN_SIZES: (700, 800)  # to save some time
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6dbfcde0bf837990634d419a6dda1e2909c3cd7f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52f78762bda23331c97afd523cf98a5c118b113e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
@@ -0,0 +1,6 @@
+_BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml"
+MODEL:
+  ROI_BOX_HEAD:
+    TRAIN_ON_PRED_BOXES: True
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aadae4ce898761e1e40e5af65a9e5ea01053b936
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val",)
+  TEST: ("coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (600,)
+  MAX_SIZE_TRAIN: 1000
+  MIN_SIZE_TEST: 800
+  MAX_SIZE_TEST: 1000
+SOLVER:
+  WARMUP_FACTOR: 0.3333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..70874e3a92c9034d75cbbebb145b61084ba15e42
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100_panoptic_separated",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7cdee7bfcf6dc75dda52602a0d9177ad0a9cc6ed
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_val_100_panoptic_separated",)
+  TEST: ("coco_2017_val_100_panoptic_separated",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 1
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3bbf30196cb35434340d4c343cab0c96283cd4f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_val_panoptic_separated",)
+  TEST: ("coco_2017_val_panoptic_separated",)
+SOLVER:
+  BASE_LR: 0.01
+  WARMUP_FACTOR: 0.001
+  WARMUP_ITERS: 500
+  STEPS: (5500,)
+  MAX_ITER: 7000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cb666c1a6b3e351227046bc9c2af8799408858e8
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d95c1f614296716374686b22055a587ccd052b9
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7c3f908a9e80e98b2d25b6d384a60acaba9d4f8
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..402d432477507dc36f04c4a9777cb80fe06b2809
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  STEPS: (30,)
+  MAX_ITER: 40
+  BASE_LR: 0.005
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bca74987d5218736983617883e0fe37f79d219b7
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+TEST:
+  EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14ab606f219b462fe37fcc7d5fbdbe65cb5c2642
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f78d775889b11e9e76743de5ddb8139198edf61
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_val_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_panoptic_stuffonly",)
+SOLVER:
+  BASE_LR: 0.01
+  WARMUP_FACTOR: 0.001
+  WARMUP_ITERS: 300
+  STEPS: (5500,)
+  MAX_ITER: 7000
+TEST:
+  EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/model_zoo.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/model_zoo.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fa9a0cbdee685d8c9d70ea8a4e4a63fa3c3c7a7
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/model_zoo/model_zoo.py
@@ -0,0 +1,172 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import os
+from typing import Optional
+import pkg_resources
+import torch
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.modeling import build_model
+
+
+class _ModelZooUrls(object):
+    """
+    Mapping from names to officially released Detectron2 pre-trained models.
+    """
+
+    S3_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
+
+    # format: {config_path.yaml} -> model_id/model_final_{commit}.pkl
+    CONFIG_PATH_TO_URL_SUFFIX = {
+        # COCO Detection with Faster R-CNN
+        "COCO-Detection/faster_rcnn_R_50_C4_1x.yaml": "137257644/model_final_721ade.pkl",
+        "COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml": "137847829/model_final_51d356.pkl",
+        "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml": "137257794/model_final_b275ba.pkl",
+        "COCO-Detection/faster_rcnn_R_50_C4_3x.yaml": "137849393/model_final_f97cb7.pkl",
+        "COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml": "137849425/model_final_68d202.pkl",
+        "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml": "137849458/model_final_280758.pkl",
+        "COCO-Detection/faster_rcnn_R_101_C4_3x.yaml": "138204752/model_final_298dad.pkl",
+        "COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml": "138204841/model_final_3e0943.pkl",
+        "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml": "137851257/model_final_f6e8b1.pkl",
+        "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml": "139173657/model_final_68b088.pkl",
+        # COCO Detection with RetinaNet
+        "COCO-Detection/retinanet_R_50_FPN_1x.yaml": "190397773/model_final_bfca0b.pkl",
+        "COCO-Detection/retinanet_R_50_FPN_3x.yaml": "190397829/model_final_5bd44e.pkl",
+        "COCO-Detection/retinanet_R_101_FPN_3x.yaml": "190397697/model_final_971ab9.pkl",
+        # COCO Detection with RPN and Fast R-CNN
+        "COCO-Detection/rpn_R_50_C4_1x.yaml": "137258005/model_final_450694.pkl",
+        "COCO-Detection/rpn_R_50_FPN_1x.yaml": "137258492/model_final_02ce48.pkl",
+        "COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml": "137635226/model_final_e5f7ce.pkl",
+        # COCO Instance Segmentation Baselines with Mask R-CNN
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml": "137259246/model_final_9243eb.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml": "137260150/model_final_4f86c3.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "137260431/model_final_a54504.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml": "137849525/model_final_4ce675.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml": "137849551/model_final_84107b.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml": "137849600/model_final_f10217.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml": "138363239/model_final_a2914c.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml": "138363294/model_final_0464b7.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml": "138205316/model_final_a3ec72.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml": "139653917/model_final_2d9806.pkl",  # noqa
+        # COCO Person Keypoint Detection Baselines with Keypoint R-CNN
+        "COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml": "137261548/model_final_04e291.pkl",
+        "COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml": "137849621/model_final_a6e10b.pkl",
+        "COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml": "138363331/model_final_997cc7.pkl",
+        "COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml": "139686956/model_final_5ad38f.pkl",
+        # COCO Panoptic Segmentation Baselines with Panoptic FPN
+        "COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml": "139514544/model_final_dbfeb4.pkl",
+        "COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml": "139514569/model_final_c10459.pkl",
+        "COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml": "139514519/model_final_cafdb1.pkl",
+        # LVIS Instance Segmentation Baselines with Mask R-CNN
+        "LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "144219072/model_final_571f7c.pkl",  # noqa
+        "LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml": "144219035/model_final_824ab5.pkl",  # noqa
+        "LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml": "144219108/model_final_5e3439.pkl",  # noqa
+        # Cityscapes & Pascal VOC Baselines
+        "Cityscapes/mask_rcnn_R_50_FPN.yaml": "142423278/model_final_af9cf5.pkl",
+        "PascalVOC-Detection/faster_rcnn_R_50_C4.yaml": "142202221/model_final_b1acc2.pkl",
+        # Other Settings
+        "Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml": "138602867/model_final_65c703.pkl",
+        "Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml": "144998336/model_final_821d0b.pkl",
+        "Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml": "138602847/model_final_e9d89b.pkl",
+        "Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml": "144998488/model_final_480dd8.pkl",
+        "Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml": "169527823/model_final_3b3c51.pkl",
+        "Misc/mask_rcnn_R_50_FPN_3x_gn.yaml": "138602888/model_final_dc5d9e.pkl",
+        "Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml": "138602908/model_final_01ca85.pkl",
+        "Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml": "183808979/model_final_da7b4c.pkl",
+        "Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml": "184226666/model_final_5ce33e.pkl",
+        "Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml": "139797668/model_final_be35db.pkl",
+        "Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml": "18131413/model_0039999_e76410.pkl",  # noqa
+        # D1 Comparisons
+        "Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml": "137781054/model_final_7ab50c.pkl",  # noqa
+        "Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml": "137781281/model_final_62ca52.pkl",  # noqa
+        "Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml": "137781195/model_final_cce136.pkl",
+    }
+
+
+def get_checkpoint_url(config_path):
+    """
+    Returns the URL to the model trained using the given config
+
+    Args:
+        config_path (str): config file name relative to detectron2's "configs/"
+            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+
+    Returns:
+        str: a URL to the model
+    """
+    name = config_path.replace(".yaml", "")
+    if config_path in _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX:
+        suffix = _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX[config_path]
+        return _ModelZooUrls.S3_PREFIX + name + "/" + suffix
+    raise RuntimeError("{} not available in Model Zoo!".format(name))
+
+
+def get_config_file(config_path):
+    """
+    Returns path to a builtin config file.
+
+    Args:
+        config_path (str): config file name relative to detectron2's "configs/"
+            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+
+    Returns:
+        str: the real path to the config file.
+    """
+    cfg_file = pkg_resources.resource_filename(
+        "detectron2.model_zoo", os.path.join("configs", config_path)
+    )
+    if not os.path.exists(cfg_file):
+        raise RuntimeError("{} not available in Model Zoo!".format(config_path))
+    return cfg_file
+
+
+def get_config(config_path, trained: bool = False):
+    """
+    Returns a config object for a model in model zoo.
+
+    Args:
+        config_path (str): config file name relative to detectron2's "configs/"
+            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+        trained (bool): If True, will set ``MODEL.WEIGHTS`` to trained model zoo weights.
+            If False, the checkpoint specified in the config file's ``MODEL.WEIGHTS`` is used
+            instead; this will typically (though not always) initialize a subset of weights using
+            an ImageNet pre-trained model, while randomly initializing the other weights.
+
+    Returns:
+        CfgNode: a config object
+    """
+    cfg_file = get_config_file(config_path)
+    cfg = get_cfg()
+    cfg.merge_from_file(cfg_file)
+    if trained:
+        cfg.MODEL.WEIGHTS = get_checkpoint_url(config_path)
+    return cfg
+
+
+def get(config_path, trained: bool = False, device: Optional[str] = None):
+    """
+    Get a model specified by relative path under Detectron2's official ``configs/`` directory.
+
+    Args:
+        config_path (str): config file name relative to detectron2's "configs/"
+            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+        trained (bool): see :func:`get_config`.
+        device (str or None): overwrite the device in config, if given.
+
+    Returns:
+        nn.Module: a detectron2 model. Will be in training mode.
+
+    Example:
+    ::
+        from detectron2 import model_zoo
+        model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml", trained=True)
+    """
+    cfg = get_config(config_path, trained)
+    if device is not None:
+        cfg.MODEL.DEVICE = device
+    elif not torch.cuda.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+
+    model = build_model(cfg)
+    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
+    return model
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0655f96b4618d716f62290ce65e7ae82335ea61f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/__init__.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from detectron2.layers import ShapeSpec
+
+from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY
+from .backbone import (
+    BACKBONE_REGISTRY,
+    FPN,
+    Backbone,
+    ResNet,
+    ResNetBlockBase,
+    build_backbone,
+    build_resnet_backbone,
+    make_stage,
+)
+from .meta_arch import (
+    META_ARCH_REGISTRY,
+    SEM_SEG_HEADS_REGISTRY,
+    GeneralizedRCNN,
+    PanopticFPN,
+    ProposalNetwork,
+    RetinaNet,
+    SemanticSegmentor,
+    build_model,
+    build_sem_seg_head,
+)
+from .postprocessing import detector_postprocess
+from .proposal_generator import (
+    PROPOSAL_GENERATOR_REGISTRY,
+    build_proposal_generator,
+    RPN_HEAD_REGISTRY,
+    build_rpn_head,
+)
+from .roi_heads import (
+    ROI_BOX_HEAD_REGISTRY,
+    ROI_HEADS_REGISTRY,
+    ROI_KEYPOINT_HEAD_REGISTRY,
+    ROI_MASK_HEAD_REGISTRY,
+    ROIHeads,
+    StandardROIHeads,
+    BaseMaskRCNNHead,
+    BaseKeypointRCNNHead,
+    FastRCNNOutputLayers,
+    build_box_head,
+    build_keypoint_head,
+    build_mask_head,
+    build_roi_heads,
+)
+from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
+from .mmdet_wrapper import MMDetBackbone, MMDetDetector
+
+_EXCLUDE = {"ShapeSpec"}
+__all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/anchor_generator.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/anchor_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8343e0ad573414c9123a75e2f51ec4487ed93d0
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/anchor_generator.py
@@ -0,0 +1,381 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import collections
+import math
+from typing import List
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec
+from detectron2.structures import Boxes, RotatedBoxes
+from detectron2.utils.registry import Registry
+
+ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR")
+ANCHOR_GENERATOR_REGISTRY.__doc__ = """
+Registry for modules that creates object detection anchors for feature maps.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+"""
+
+
+class BufferList(nn.Module):
+    """
+    Similar to nn.ParameterList, but for buffers
+    """
+
+    def __init__(self, buffers):
+        super().__init__()
+        for i, buffer in enumerate(buffers):
+            self.register_buffer(str(i), buffer)
+
+    def __len__(self):
+        return len(self._buffers)
+
+    def __iter__(self):
+        return iter(self._buffers.values())
+
+
+def _create_grid_offsets(size: List[int], stride: int, offset: float, device: torch.device):
+    grid_height, grid_width = size
+    shifts_x = torch.arange(
+        offset * stride, grid_width * stride, step=stride, dtype=torch.float32, device=device
+    )
+    shifts_y = torch.arange(
+        offset * stride, grid_height * stride, step=stride, dtype=torch.float32, device=device
+    )
+
+    shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
+    shift_x = shift_x.reshape(-1)
+    shift_y = shift_y.reshape(-1)
+    return shift_x, shift_y
+
+
+def _broadcast_params(params, num_features, name):
+    """
+    If one size (or aspect ratio) is specified and there are multiple feature
+    maps, we "broadcast" anchors of that single size (or aspect ratio)
+    over all feature maps.
+
+    If params is list[float], or list[list[float]] with len(params) == 1, repeat
+    it num_features time.
+
+    Returns:
+        list[list[float]]: param for each feature
+    """
+    assert isinstance(
+        params, collections.abc.Sequence
+    ), f"{name} in anchor generator has to be a list! Got {params}."
+    assert len(params), f"{name} in anchor generator cannot be empty!"
+    if not isinstance(params[0], collections.abc.Sequence):  # params is list[float]
+        return [params] * num_features
+    if len(params) == 1:
+        return list(params) * num_features
+    assert len(params) == num_features, (
+        f"Got {name} of length {len(params)} in anchor generator, "
+        f"but the number of input features is {num_features}!"
+    )
+    return params
+
+
+@ANCHOR_GENERATOR_REGISTRY.register()
+class DefaultAnchorGenerator(nn.Module):
+    """
+    Compute anchors in the standard ways described in
+    "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks".
+    """
+
+    box_dim: torch.jit.Final[int] = 4
+    """
+    the dimension of each anchor box.
+    """
+
+    @configurable
+    def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5):
+        """
+        This interface is experimental.
+
+        Args:
+            sizes (list[list[float]] or list[float]):
+                If ``sizes`` is list[list[float]], ``sizes[i]`` is the list of anchor sizes
+                (i.e. sqrt of anchor area) to use for the i-th feature map.
+                If ``sizes`` is list[float], ``sizes`` is used for all feature maps.
+                Anchor sizes are given in absolute lengths in units of
+                the input image; they do not dynamically scale if the input image size changes.
+            aspect_ratios (list[list[float]] or list[float]): list of aspect ratios
+                (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies.
+            strides (list[int]): stride of each input feature.
+            offset (float): Relative offset between the center of the first anchor and the top-left
+                corner of the image. Value has to be in [0, 1).
+                Recommend to use 0.5, which means half stride.
+        """
+        super().__init__()
+
+        self.strides = strides
+        self.num_features = len(self.strides)
+        sizes = _broadcast_params(sizes, self.num_features, "sizes")
+        aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios")
+        self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios)
+
+        self.offset = offset
+        assert 0.0 <= self.offset < 1.0, self.offset
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: List[ShapeSpec]):
+        return {
+            "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES,
+            "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS,
+            "strides": [x.stride for x in input_shape],
+            "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET,
+        }
+
+    def _calculate_anchors(self, sizes, aspect_ratios):
+        cell_anchors = [
+            self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios)
+        ]
+        return BufferList(cell_anchors)
+
+    @property
+    @torch.jit.unused
+    def num_cell_anchors(self):
+        """
+        Alias of `num_anchors`.
+        """
+        return self.num_anchors
+
+    @property
+    @torch.jit.unused
+    def num_anchors(self):
+        """
+        Returns:
+            list[int]: Each int is the number of anchors at every pixel
+                location, on that feature map.
+                For example, if at every pixel we use anchors of 3 aspect
+                ratios and 5 sizes, the number of anchors is 15.
+                (See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config)
+
+                In standard RPN models, `num_anchors` on every feature map is the same.
+        """
+        return [len(cell_anchors) for cell_anchors in self.cell_anchors]
+
+    def _grid_anchors(self, grid_sizes: List[List[int]]):
+        """
+        Returns:
+            list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4
+        """
+        anchors = []
+        # buffers() not supported by torchscript. use named_buffers() instead
+        buffers: List[torch.Tensor] = [x[1] for x in self.cell_anchors.named_buffers()]
+        for size, stride, base_anchors in zip(grid_sizes, self.strides, buffers):
+            shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device)
+            shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1)
+
+            anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4))
+
+        return anchors
+
+    def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)):
+        """
+        Generate a tensor storing canonical anchor boxes, which are all anchor
+        boxes of different sizes and aspect_ratios centered at (0, 0).
+        We can later build the set of anchors for a full feature map by
+        shifting and tiling these tensors (see `meth:_grid_anchors`).
+
+        Args:
+            sizes (tuple[float]):
+            aspect_ratios (tuple[float]]):
+
+        Returns:
+            Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes
+                in XYXY format.
+        """
+
+        # This is different from the anchor generator defined in the original Faster R-CNN
+        # code or Detectron. They yield the same AP, however the old version defines cell
+        # anchors in a less natural way with a shift relative to the feature grid and
+        # quantization that results in slightly different sizes for different aspect ratios.
+        # See also https://github.com/facebookresearch/Detectron/issues/227
+
+        anchors = []
+        for size in sizes:
+            area = size ** 2.0
+            for aspect_ratio in aspect_ratios:
+                # s * s = w * h
+                # a = h / w
+                # ... some algebra ...
+                # w = sqrt(s * s / a)
+                # h = a * w
+                w = math.sqrt(area / aspect_ratio)
+                h = aspect_ratio * w
+                x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0
+                anchors.append([x0, y0, x1, y1])
+        return torch.tensor(anchors)
+
+    def forward(self, features: List[torch.Tensor]):
+        """
+        Args:
+            features (list[Tensor]): list of backbone feature maps on which to generate anchors.
+
+        Returns:
+            list[Boxes]: a list of Boxes containing all the anchors for each feature map
+                (i.e. the cell anchors repeated over all locations in the feature map).
+                The number of anchors of each feature map is Hi x Wi x num_cell_anchors,
+                where Hi, Wi are resolution of the feature map divided by anchor stride.
+        """
+        grid_sizes = [feature_map.shape[-2:] for feature_map in features]
+        anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
+        return [Boxes(x) for x in anchors_over_all_feature_maps]
+
+
+@ANCHOR_GENERATOR_REGISTRY.register()
+class RotatedAnchorGenerator(nn.Module):
+    """
+    Compute rotated anchors used by Rotated RPN (RRPN), described in
+    "Arbitrary-Oriented Scene Text Detection via Rotation Proposals".
+    """
+
+    box_dim: int = 5
+    """
+    the dimension of each anchor box.
+    """
+
+    @configurable
+    def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5):
+        """
+        This interface is experimental.
+
+        Args:
+            sizes (list[list[float]] or list[float]):
+                If sizes is list[list[float]], sizes[i] is the list of anchor sizes
+                (i.e. sqrt of anchor area) to use for the i-th feature map.
+                If sizes is list[float], the sizes are used for all feature maps.
+                Anchor sizes are given in absolute lengths in units of
+                the input image; they do not dynamically scale if the input image size changes.
+            aspect_ratios (list[list[float]] or list[float]): list of aspect ratios
+                (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies.
+            strides (list[int]): stride of each input feature.
+            angles (list[list[float]] or list[float]): list of angles (in degrees CCW)
+                to use for anchors. Same "broadcast" rule for `sizes` applies.
+            offset (float): Relative offset between the center of the first anchor and the top-left
+                corner of the image. Value has to be in [0, 1).
+                Recommend to use 0.5, which means half stride.
+        """
+        super().__init__()
+
+        self.strides = strides
+        self.num_features = len(self.strides)
+        sizes = _broadcast_params(sizes, self.num_features, "sizes")
+        aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios")
+        angles = _broadcast_params(angles, self.num_features, "angles")
+        self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles)
+
+        self.offset = offset
+        assert 0.0 <= self.offset < 1.0, self.offset
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: List[ShapeSpec]):
+        return {
+            "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES,
+            "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS,
+            "strides": [x.stride for x in input_shape],
+            "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET,
+            "angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES,
+        }
+
+    def _calculate_anchors(self, sizes, aspect_ratios, angles):
+        cell_anchors = [
+            self.generate_cell_anchors(size, aspect_ratio, angle).float()
+            for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles)
+        ]
+        return BufferList(cell_anchors)
+
+    @property
+    def num_cell_anchors(self):
+        """
+        Alias of `num_anchors`.
+        """
+        return self.num_anchors
+
+    @property
+    def num_anchors(self):
+        """
+        Returns:
+            list[int]: Each int is the number of anchors at every pixel
+                location, on that feature map.
+                For example, if at every pixel we use anchors of 3 aspect
+                ratios, 2 sizes and 5 angles, the number of anchors is 30.
+                (See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS
+                and ANCHOR_GENERATOR.ANGLES in config)
+
+                In standard RRPN models, `num_anchors` on every feature map is the same.
+        """
+        return [len(cell_anchors) for cell_anchors in self.cell_anchors]
+
+    def _grid_anchors(self, grid_sizes):
+        anchors = []
+        for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors):
+            shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device)
+            zeros = torch.zeros_like(shift_x)
+            shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1)
+
+            anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5))
+
+        return anchors
+
+    def generate_cell_anchors(
+        self,
+        sizes=(32, 64, 128, 256, 512),
+        aspect_ratios=(0.5, 1, 2),
+        angles=(-90, -60, -30, 0, 30, 60, 90),
+    ):
+        """
+        Generate a tensor storing canonical anchor boxes, which are all anchor
+        boxes of different sizes, aspect_ratios, angles centered at (0, 0).
+        We can later build the set of anchors for a full feature map by
+        shifting and tiling these tensors (see `meth:_grid_anchors`).
+
+        Args:
+            sizes (tuple[float]):
+            aspect_ratios (tuple[float]]):
+            angles (tuple[float]]):
+
+        Returns:
+            Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5)
+                storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format.
+        """
+        anchors = []
+        for size in sizes:
+            area = size ** 2.0
+            for aspect_ratio in aspect_ratios:
+                # s * s = w * h
+                # a = h / w
+                # ... some algebra ...
+                # w = sqrt(s * s / a)
+                # h = a * w
+                w = math.sqrt(area / aspect_ratio)
+                h = aspect_ratio * w
+                anchors.extend([0, 0, w, h, a] for a in angles)
+
+        return torch.tensor(anchors)
+
+    def forward(self, features):
+        """
+        Args:
+            features (list[Tensor]): list of backbone feature maps on which to generate anchors.
+
+        Returns:
+            list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map
+                (i.e. the cell anchors repeated over all locations in the feature map).
+                The number of anchors of each feature map is Hi x Wi x num_cell_anchors,
+                where Hi, Wi are resolution of the feature map divided by anchor stride.
+        """
+        grid_sizes = [feature_map.shape[-2:] for feature_map in features]
+        anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
+        return [RotatedBoxes(x) for x in anchors_over_all_feature_maps]
+
+
+def build_anchor_generator(cfg, input_shape):
+    """
+    Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`.
+    """
+    anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME
+    return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a34cbc5976db8e0b7d62b9d70a83a34c187c388a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .build import build_backbone, BACKBONE_REGISTRY  # noqa F401 isort:skip
+
+from .backbone import Backbone
+from .fpn import FPN
+from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
+from .swin_transformer import * 
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+# TODO can expose more resnet blocks after careful consideration
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/backbone.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..369fb884930c5dd82f94024c45303dafaab14d66
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/backbone.py
@@ -0,0 +1,53 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from abc import ABCMeta, abstractmethod
+import torch.nn as nn
+
+from detectron2.layers import ShapeSpec
+
+__all__ = ["Backbone"]
+
+
+class Backbone(nn.Module, metaclass=ABCMeta):
+    """
+    Abstract base class for network backbones.
+    """
+
+    def __init__(self):
+        """
+        The `__init__` method of any subclass can specify its own set of arguments.
+        """
+        super().__init__()
+
+    @abstractmethod
+    def forward(self):
+        """
+        Subclasses must override this method, but adhere to the same return type.
+
+        Returns:
+            dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
+        """
+        pass
+
+    @property
+    def size_divisibility(self) -> int:
+        """
+        Some backbones require the input height and width to be divisible by a
+        specific integer. This is typically true for encoder / decoder type networks
+        with lateral connection (e.g., FPN) for which feature maps need to match
+        dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
+        input size divisibility is required.
+        """
+        return 0
+
+    def output_shape(self):
+        """
+        Returns:
+            dict[str->ShapeSpec]
+        """
+        # this is a backward-compatible default
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self._out_features
+        }
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/build.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..af02141172bebe9a2a27a88c81673c2710b4d73f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/build.py
@@ -0,0 +1,33 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from detectron2.layers import ShapeSpec
+from detectron2.utils.registry import Registry
+
+from .backbone import Backbone
+
+BACKBONE_REGISTRY = Registry("BACKBONE")
+BACKBONE_REGISTRY.__doc__ = """
+Registry for backbones, which extract feature maps from images
+
+The registered object must be a callable that accepts two arguments:
+
+1. A :class:`detectron2.config.CfgNode`
+2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification.
+
+Registered object must return instance of :class:`Backbone`.
+"""
+
+
+def build_backbone(cfg, input_shape=None):
+    """
+    Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
+
+    Returns:
+        an instance of :class:`Backbone`
+    """
+    if input_shape is None:
+        input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
+
+    backbone_name = cfg.MODEL.BACKBONE.NAME
+    backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape)
+    assert isinstance(backbone, Backbone)
+    return backbone
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/fpn.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0bdfc9da8cb7afc9ef421baef2c173a63ff1743
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/fpn.py
@@ -0,0 +1,255 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+
+from .backbone import Backbone
+from .build import BACKBONE_REGISTRY
+from .resnet import build_resnet_backbone
+
+__all__ = ["build_resnet_fpn_backbone", "build_retinanet_resnet_fpn_backbone", "FPN"]
+
+
+class FPN(Backbone):
+    """
+    This module implements :paper:`FPN`.
+    It creates pyramid features built on top of some input feature maps.
+    """
+
+    _fuse_type: torch.jit.Final[str]
+
+    def __init__(
+        self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"
+    ):
+        """
+        Args:
+            bottom_up (Backbone): module representing the bottom up subnetwork.
+                Must be a subclass of :class:`Backbone`. The multi-scale feature
+                maps generated by the bottom up network, and listed in `in_features`,
+                are used to generate FPN levels.
+            in_features (list[str]): names of the input feature maps coming
+                from the backbone to which FPN is attached. For example, if the
+                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
+                of these may be used; order must be from high to low resolution.
+            out_channels (int): number of channels in the output feature maps.
+            norm (str): the normalization to use.
+            top_block (nn.Module or None): if provided, an extra operation will
+                be performed on the output of the last (smallest resolution)
+                FPN output, and the result will extend the result list. The top_block
+                further downsamples the feature map. It must have an attribute
+                "num_levels", meaning the number of extra FPN levels added by
+                this block, and "in_feature", which is a string representing
+                its input feature (e.g., p5).
+            fuse_type (str): types for fusing the top down features and the lateral
+                ones. It can be "sum" (default), which sums up element-wise; or "avg",
+                which takes the element-wise mean of the two.
+        """
+        super(FPN, self).__init__()
+        assert isinstance(bottom_up, Backbone)
+        assert in_features, in_features
+
+        # Feature map strides and channels from the bottom up network (e.g. ResNet)
+        input_shapes = bottom_up.output_shape()
+        strides = [input_shapes[f].stride for f in in_features]
+        in_channels_per_feature = [input_shapes[f].channels for f in in_features]
+
+        _assert_strides_are_log2_contiguous(strides)
+        lateral_convs = []
+        output_convs = []
+
+        use_bias = norm == ""
+        for idx, in_channels in enumerate(in_channels_per_feature):
+            lateral_norm = get_norm(norm, out_channels)
+            output_norm = get_norm(norm, out_channels)
+
+            lateral_conv = Conv2d(
+                in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm
+            )
+            output_conv = Conv2d(
+                out_channels,
+                out_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                bias=use_bias,
+                norm=output_norm,
+            )
+            weight_init.c2_xavier_fill(lateral_conv)
+            weight_init.c2_xavier_fill(output_conv)
+            stage = int(math.log2(strides[idx]))
+            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
+            self.add_module("fpn_output{}".format(stage), output_conv)
+
+            lateral_convs.append(lateral_conv)
+            output_convs.append(output_conv)
+        # Place convs into top-down order (from low to high resolution)
+        # to make the top-down computation in forward clearer.
+        self.lateral_convs = lateral_convs[::-1]
+        self.output_convs = output_convs[::-1]
+        self.top_block = top_block
+        self.in_features = tuple(in_features)
+        self.bottom_up = bottom_up
+        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
+        self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in strides}
+        # top block output feature maps.
+        if self.top_block is not None:
+            for s in range(stage, stage + self.top_block.num_levels):
+                self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)
+
+        self._out_features = list(self._out_feature_strides.keys())
+        self._out_feature_channels = {k: out_channels for k in self._out_features}
+        self._size_divisibility = strides[-1]
+        assert fuse_type in {"avg", "sum"}
+        self._fuse_type = fuse_type
+
+    @property
+    def size_divisibility(self):
+        return self._size_divisibility
+
+    def forward(self, x):
+        """
+        Args:
+            input (dict[str->Tensor]): mapping feature map name (e.g., "res5") to
+                feature map tensor for each feature level in high to low resolution order.
+
+        Returns:
+            dict[str->Tensor]:
+                mapping from feature map name to FPN feature map tensor
+                in high to low resolution order. Returned feature names follow the FPN
+                paper convention: "p<stage>", where stage has stride = 2 ** stage e.g.,
+                ["p2", "p3", ..., "p6"].
+        """
+        bottom_up_features = self.bottom_up(x)
+        results = []
+        prev_features = self.lateral_convs[0](bottom_up_features[self.in_features[-1]])
+        results.append(self.output_convs[0](prev_features))
+
+        # Reverse feature maps into top-down order (from low to high resolution)
+        for idx, (lateral_conv, output_conv) in enumerate(
+            zip(self.lateral_convs, self.output_convs)
+        ):
+            # Slicing of ModuleList is not supported https://github.com/pytorch/pytorch/issues/47336
+            # Therefore we loop over all modules but skip the first one
+            if idx > 0:
+                features = self.in_features[-idx - 1]
+                features = bottom_up_features[features]
+                top_down_features = F.interpolate(prev_features, scale_factor=2.0, mode="nearest")
+                lateral_features = lateral_conv(features)
+                prev_features = lateral_features + top_down_features
+                if self._fuse_type == "avg":
+                    prev_features /= 2
+                results.insert(0, output_conv(prev_features))
+
+        if self.top_block is not None:
+            if self.top_block.in_feature in bottom_up_features:
+                top_block_in_feature = bottom_up_features[self.top_block.in_feature]
+            else:
+                top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)]
+            results.extend(self.top_block(top_block_in_feature))
+        assert len(self._out_features) == len(results)
+        return {f: res for f, res in zip(self._out_features, results)}
+
+    def output_shape(self):
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self._out_features
+        }
+
+
+def _assert_strides_are_log2_contiguous(strides):
+    """
+    Assert that each stride is 2x times its preceding stride, i.e. "contiguous in log2".
+    """
+    for i, stride in enumerate(strides[1:], 1):
+        assert stride == 2 * strides[i - 1], "Strides {} {} are not log2 contiguous".format(
+            stride, strides[i - 1]
+        )
+
+
+class LastLevelMaxPool(nn.Module):
+    """
+    This module is used in the original FPN to generate a downsampled
+    P6 feature from P5.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.num_levels = 1
+        self.in_feature = "p5"
+
+    def forward(self, x):
+        return [F.max_pool2d(x, kernel_size=1, stride=2, padding=0)]
+
+
+class LastLevelP6P7(nn.Module):
+    """
+    This module is used in RetinaNet to generate extra layers, P6 and P7 from
+    C5 feature.
+    """
+
+    def __init__(self, in_channels, out_channels, in_feature="res5"):
+        super().__init__()
+        self.num_levels = 2
+        self.in_feature = in_feature
+        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
+        self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
+        for module in [self.p6, self.p7]:
+            weight_init.c2_xavier_fill(module)
+
+    def forward(self, c5):
+        p6 = self.p6(c5)
+        p7 = self.p7(F.relu(p6))
+        return [p6, p7]
+
+
+@BACKBONE_REGISTRY.register()
+def build_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_resnet_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelMaxPool(),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
+
+
+@BACKBONE_REGISTRY.register()
+def build_retinanet_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_resnet_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    in_channels_p6p7 = bottom_up.output_shape()["res5"].channels
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelP6P7(in_channels_p6p7, out_channels),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/fpn_swin.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/fpn_swin.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc38cfe2a1e65a95ac86555e42d8182384345a44
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/fpn_swin.py
@@ -0,0 +1,600 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+
+from .backbone import Backbone
+from .build import BACKBONE_REGISTRY
+from .resnet import build_resnet_backbone
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+import numpy as np
+
+__all__ = ["build_resnet_fpn_backbone", "build_retinanet_resnet_fpn_backbone", "FPN"]
+
+class Mlp(nn.Module):
+    """ Multilayer perceptron."""
+
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+
+
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+
+
+class WindowAttention(nn.Module):
+    """ Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+
+    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
+
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x, mask=None):
+        """ Forward function.
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+
+        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, N, N)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class SwinTransformerBlock(nn.Module):
+    """ Swin Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+
+    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        self.H = None
+        self.W = None
+
+    def forward(self, x, mask_matrix):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+            mask_matrix: Attention mask for cyclic shift.
+        """
+        B, L, C = x.shape
+        H, W = self.H, self.W
+        assert L == H * W, "input feature has wrong size"
+
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+
+        # pad feature maps to multiples of window size
+        pad_l = pad_t = 0
+        pad_r = (self.window_size - W % self.window_size) % self.window_size
+        pad_b = (self.window_size - H % self.window_size) % self.window_size
+        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
+        _, Hp, Wp, _ = x.shape
+
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+            attn_mask = mask_matrix
+        else:
+            shifted_x = x
+            attn_mask = None
+
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
+
+        # W-MSA/SW-MSA
+        attn_windows = self.attn(x_windows, mask=attn_mask)  # nW*B, window_size*window_size, C
+
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # B H' W' C
+
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+
+        if pad_r > 0 or pad_b > 0:
+            x = x[:, :H, :W, :].contiguous()
+
+        x = x.view(B, H * W, C)
+
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+
+        return x
+
+class swin_layer(nn.Module):
+    """ A basic Swin Transformer layer for one stage.
+    Args:
+        dim (int): Number of feature channels
+        depth (int): Depths of this stage.
+        num_heads (int): Number of attention head.
+        window_size (int): Local window size. Default: 7.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+
+    def __init__(self,
+                 dim,
+                 depth,
+                 num_heads,
+                 window_size=7,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 norm_layer=nn.LayerNorm,
+                 downsample=None,
+                 use_checkpoint=False):
+        super().__init__()
+        self.window_size = window_size
+        self.shift_size = window_size // 2
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+
+        # build blocks
+        self.blocks = nn.ModuleList([
+            SwinTransformerBlock(
+                dim=dim,
+                num_heads=num_heads,
+                window_size=window_size,
+                shift_size=0 if (i % 2 == 0) else window_size // 2,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop,
+                attn_drop=attn_drop,
+                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                norm_layer=norm_layer)
+            for i in range(depth)])
+
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(dim=dim, norm_layer=norm_layer)
+        else:
+            self.downsample = None
+
+    def forward(self, x, H, W):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+
+        # calculate attention mask for SW-MSA
+        Hp = int(np.ceil(H / self.window_size)) * self.window_size
+        Wp = int(np.ceil(W / self.window_size)) * self.window_size
+        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # 1 Hp Wp 1
+        h_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        w_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+
+        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+        for blk in self.blocks:
+            blk.H, blk.W = H, W
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, attn_mask)
+            else:
+                x = blk(x, attn_mask)
+        if self.downsample is not None:
+            x_down = self.downsample(x, H, W)
+            Wh, Ww = (H + 1) // 2, (W + 1) // 2
+            return x, H, W, x_down, Wh, Ww
+        else:
+            return x, H, W, x, H, W
+
+class FPN(Backbone):
+    """
+    This module implements :paper:`FPN`.
+    It creates pyramid features built on top of some input feature maps.
+    """
+
+    _fuse_type: torch.jit.Final[str]
+
+    def __init__(
+        self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"
+    ):
+        """
+        Args:
+            bottom_up (Backbone): module representing the bottom up subnetwork.
+                Must be a subclass of :class:`Backbone`. The multi-scale feature
+                maps generated by the bottom up network, and listed in `in_features`,
+                are used to generate FPN levels.
+            in_features (list[str]): names of the input feature maps coming
+                from the backbone to which FPN is attached. For example, if the
+                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
+                of these may be used; order must be from high to low resolution.
+            out_channels (int): number of channels in the output feature maps.
+            norm (str): the normalization to use.
+            top_block (nn.Module or None): if provided, an extra operation will
+                be performed on the output of the last (smallest resolution)
+                FPN output, and the result will extend the result list. The top_block
+                further downsamples the feature map. It must have an attribute
+                "num_levels", meaning the number of extra FPN levels added by
+                this block, and "in_feature", which is a string representing
+                its input feature (e.g., p5).
+            fuse_type (str): types for fusing the top down features and the lateral
+                ones. It can be "sum" (default), which sums up element-wise; or "avg",
+                which takes the element-wise mean of the two.
+        """
+        super(FPN, self).__init__()
+        assert isinstance(bottom_up, Backbone)
+        assert in_features, in_features
+
+        # Feature map strides and channels from the bottom up network (e.g. ResNet)
+        input_shapes = bottom_up.output_shape()
+        strides = [input_shapes[f].stride for f in in_features]
+        in_channels_per_feature = [input_shapes[f].channels for f in in_features]
+
+        _assert_strides_are_log2_contiguous(strides)
+        lateral_convs = []
+        output_convs = []
+
+        use_bias = norm == ""
+        for idx, in_channels in enumerate(in_channels_per_feature):
+            lateral_norm = get_norm(norm, out_channels)
+            output_norm = get_norm(norm, out_channels)
+
+            lateral_conv = Conv2d(
+                in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm
+            )
+            # output_conv = Conv2d(
+            #     out_channels,
+            #     out_channels,
+            #     kernel_size=3,
+            #     stride=1,
+            #     padding=1,
+            #     bias=use_bias,
+            #     norm=output_norm,
+            # )
+            output_conv = swin_layer( dim=out_channels,
+                                        depth=1,
+                                        num_heads=2,
+                                        window_size=7)
+            self.out_channels = out_channels
+            weight_init.c2_xavier_fill(lateral_conv)
+            # weight_init.c2_xavier_fill(output_conv)
+            stage = int(math.log2(strides[idx]))
+            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
+            self.add_module("fpn_output{}".format(stage), output_conv)
+
+            lateral_convs.append(lateral_conv)
+            output_convs.append(output_conv)
+        # Place convs into top-down order (from low to high resolution)
+        # to make the top-down computation in forward clearer.
+        self.lateral_convs = lateral_convs[::-1]
+        self.output_convs = output_convs[::-1]
+        self.top_block = top_block
+        self.in_features = tuple(in_features)
+        self.bottom_up = bottom_up
+        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
+        self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in strides}
+        # top block output feature maps.
+        if self.top_block is not None:
+            for s in range(stage, stage + self.top_block.num_levels):
+                self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)
+
+        self._out_features = list(self._out_feature_strides.keys())
+        self._out_feature_channels = {k: out_channels for k in self._out_features}
+        self._size_divisibility = strides[-1]
+        assert fuse_type in {"avg", "sum"}
+        self._fuse_type = fuse_type
+
+    @property
+    def size_divisibility(self):
+        return self._size_divisibility
+
+    def forward(self, x):
+        """
+        Args:
+            input (dict[str->Tensor]): mapping feature map name (e.g., "res5") to
+                feature map tensor for each feature level in high to low resolution order.
+
+        Returns:
+            dict[str->Tensor]:
+                mapping from feature map name to FPN feature map tensor
+                in high to low resolution order. Returned feature names follow the FPN
+                paper convention: "p<stage>", where stage has stride = 2 ** stage e.g.,
+                ["p2", "p3", ..., "p6"].
+        """
+        bottom_up_features = self.bottom_up(x)
+        results = []
+        prev_features = self.lateral_convs[0](bottom_up_features[self.in_features[-1]])
+        B, C, Wh, Ww = prev_features.size()
+        prev_features = prev_features.flatten(2).transpose(1, 2)
+        x_out, H, W, x, Wh, Ww = self.output_convs[0](prev_features, Wh, Ww)
+        prev_features = x_out.transpose(1, 2).view(-1, self.out_channels, Wh, Ww)
+        results.append(prev_features)
+        # Reverse feature maps into top-down order (from low to high resolution)
+        for idx, (lateral_conv, output_conv) in enumerate(
+            zip(self.lateral_convs, self.output_convs)
+        ):
+            # Slicing of ModuleList is not supported https://github.com/pytorch/pytorch/issues/47336
+            # Therefore we loop over all modules but skip the first one
+            if idx > 0:
+                features = self.in_features[-idx - 1]
+                features = bottom_up_features[features]
+                top_down_features = F.interpolate(prev_features, scale_factor=2.0, mode="nearest")
+                lateral_features = lateral_conv(features)
+                prev_features = lateral_features + top_down_features
+                if self._fuse_type == "avg":
+                    prev_features /= 2
+                B, C, Wh, Ww = prev_features.size()
+                prev_features = prev_features.flatten(2).transpose(1, 2)
+                x_out, H, W, x, Wh, Ww = self.output_convs[0](prev_features, Wh, Ww)
+                prev_features = x_out.transpose(1, 2).view(-1, self.out_channels, Wh, Ww)
+                results.insert(0, prev_features)
+
+        if self.top_block is not None:
+            if self.top_block.in_feature in bottom_up_features:
+                top_block_in_feature = bottom_up_features[self.top_block.in_feature]
+            else:
+                top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)]
+            results.extend(self.top_block(top_block_in_feature))
+        assert len(self._out_features) == len(results)
+        return {f: res for f, res in zip(self._out_features, results)}
+
+    def output_shape(self):
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self._out_features
+        }
+
+
+def _assert_strides_are_log2_contiguous(strides):
+    """
+    Assert that each stride is 2x times its preceding stride, i.e. "contiguous in log2".
+    """
+    for i, stride in enumerate(strides[1:], 1):
+        assert stride == 2 * strides[i - 1], "Strides {} {} are not log2 contiguous".format(
+            stride, strides[i - 1]
+        )
+
+
+class LastLevelMaxPool(nn.Module):
+    """
+    This module is used in the original FPN to generate a downsampled
+    P6 feature from P5.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.num_levels = 1
+        self.in_feature = "p5"
+
+    def forward(self, x):
+        return [F.max_pool2d(x, kernel_size=1, stride=2, padding=0)]
+
+
+class LastLevelP6P7(nn.Module):
+    """
+    This module is used in RetinaNet to generate extra layers, P6 and P7 from
+    C5 feature.
+    """
+
+    def __init__(self, in_channels, out_channels, in_feature="res5"):
+        super().__init__()
+        self.num_levels = 2
+        self.in_feature = in_feature
+        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
+        self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
+        for module in [self.p6, self.p7]:
+            weight_init.c2_xavier_fill(module)
+
+    def forward(self, c5):
+        p6 = self.p6(c5)
+        p7 = self.p7(F.relu(p6))
+        return [p6, p7]
+
+
+@BACKBONE_REGISTRY.register()
+def build_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_resnet_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelMaxPool(),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
+
+
+@BACKBONE_REGISTRY.register()
+def build_retinanet_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_resnet_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    in_channels_p6p7 = bottom_up.output_shape()["res5"].channels
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelP6P7(in_channels_p6p7, out_channels),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/resnet.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..85bcf7f45b7e861ec027c6d677b28e7dd713931c
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/resnet.py
@@ -0,0 +1,693 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.layers import (
+    CNNBlockBase,
+    Conv2d,
+    DeformConv,
+    ModulatedDeformConv,
+    ShapeSpec,
+    get_norm,
+)
+
+from .backbone import Backbone
+from .build import BACKBONE_REGISTRY
+
+__all__ = [
+    "ResNetBlockBase",
+    "BasicBlock",
+    "BottleneckBlock",
+    "DeformBottleneckBlock",
+    "BasicStem",
+    "ResNet",
+    "make_stage",
+    "build_resnet_backbone",
+]
+
+
+class BasicBlock(CNNBlockBase):
+    """
+    The basic residual block for ResNet-18 and ResNet-34 defined in :paper:`ResNet`,
+    with two 3x3 conv layers and a projection shortcut if needed.
+    """
+
+    def __init__(self, in_channels, out_channels, *, stride=1, norm="BN"):
+        """
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            stride (int): Stride for the first conv.
+            norm (str or callable): normalization for all conv layers.
+                See :func:`layers.get_norm` for supported format.
+        """
+        super().__init__(in_channels, out_channels, stride)
+
+        if in_channels != out_channels:
+            self.shortcut = Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                stride=stride,
+                bias=False,
+                norm=get_norm(norm, out_channels),
+            )
+        else:
+            self.shortcut = None
+
+        self.conv1 = Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+
+        self.conv2 = Conv2d(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+
+        for layer in [self.conv1, self.conv2, self.shortcut]:
+            if layer is not None:  # shortcut can be None
+                weight_init.c2_msra_fill(layer)
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = F.relu_(out)
+        out = self.conv2(out)
+
+        if self.shortcut is not None:
+            shortcut = self.shortcut(x)
+        else:
+            shortcut = x
+
+        out += shortcut
+        out = F.relu_(out)
+        return out
+
+
+class BottleneckBlock(CNNBlockBase):
+    """
+    The standard bottleneck residual block used by ResNet-50, 101 and 152
+    defined in :paper:`ResNet`.  It contains 3 conv layers with kernels
+    1x1, 3x3, 1x1, and a projection shortcut if needed.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        *,
+        bottleneck_channels,
+        stride=1,
+        num_groups=1,
+        norm="BN",
+        stride_in_1x1=False,
+        dilation=1,
+    ):
+        """
+        Args:
+            bottleneck_channels (int): number of output channels for the 3x3
+                "bottleneck" conv layers.
+            num_groups (int): number of groups for the 3x3 conv layer.
+            norm (str or callable): normalization for all conv layers.
+                See :func:`layers.get_norm` for supported format.
+            stride_in_1x1 (bool): when stride>1, whether to put stride in the
+                first 1x1 convolution or the bottleneck 3x3 convolution.
+            dilation (int): the dilation rate of the 3x3 conv layer.
+        """
+        super().__init__(in_channels, out_channels, stride)
+
+        if in_channels != out_channels:
+            self.shortcut = Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                stride=stride,
+                bias=False,
+                norm=get_norm(norm, out_channels),
+            )
+        else:
+            self.shortcut = None
+
+        # The original MSRA ResNet models have stride in the first 1x1 conv
+        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
+        # stride in the 3x3 conv
+        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
+
+        self.conv1 = Conv2d(
+            in_channels,
+            bottleneck_channels,
+            kernel_size=1,
+            stride=stride_1x1,
+            bias=False,
+            norm=get_norm(norm, bottleneck_channels),
+        )
+
+        self.conv2 = Conv2d(
+            bottleneck_channels,
+            bottleneck_channels,
+            kernel_size=3,
+            stride=stride_3x3,
+            padding=1 * dilation,
+            bias=False,
+            groups=num_groups,
+            dilation=dilation,
+            norm=get_norm(norm, bottleneck_channels),
+        )
+
+        self.conv3 = Conv2d(
+            bottleneck_channels,
+            out_channels,
+            kernel_size=1,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+
+        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
+            if layer is not None:  # shortcut can be None
+                weight_init.c2_msra_fill(layer)
+
+        # Zero-initialize the last normalization in each residual branch,
+        # so that at the beginning, the residual branch starts with zeros,
+        # and each residual block behaves like an identity.
+        # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
+        # "For BN layers, the learnable scaling coefficient γ is initialized
+        # to be 1, except for each residual block's last BN
+        # where γ is initialized to be 0."
+
+        # nn.init.constant_(self.conv3.norm.weight, 0)
+        # TODO this somehow hurts performance when training GN models from scratch.
+        # Add it as an option when we need to use this code to train a backbone.
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = F.relu_(out)
+
+        out = self.conv2(out)
+        out = F.relu_(out)
+
+        out = self.conv3(out)
+
+        if self.shortcut is not None:
+            shortcut = self.shortcut(x)
+        else:
+            shortcut = x
+
+        out += shortcut
+        out = F.relu_(out)
+        return out
+
+
+class DeformBottleneckBlock(CNNBlockBase):
+    """
+    Similar to :class:`BottleneckBlock`, but with :paper:`deformable conv <deformconv>`
+    in the 3x3 convolution.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        *,
+        bottleneck_channels,
+        stride=1,
+        num_groups=1,
+        norm="BN",
+        stride_in_1x1=False,
+        dilation=1,
+        deform_modulated=False,
+        deform_num_groups=1,
+    ):
+        super().__init__(in_channels, out_channels, stride)
+        self.deform_modulated = deform_modulated
+
+        if in_channels != out_channels:
+            self.shortcut = Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                stride=stride,
+                bias=False,
+                norm=get_norm(norm, out_channels),
+            )
+        else:
+            self.shortcut = None
+
+        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
+
+        self.conv1 = Conv2d(
+            in_channels,
+            bottleneck_channels,
+            kernel_size=1,
+            stride=stride_1x1,
+            bias=False,
+            norm=get_norm(norm, bottleneck_channels),
+        )
+
+        if deform_modulated:
+            deform_conv_op = ModulatedDeformConv
+            # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size
+            offset_channels = 27
+        else:
+            deform_conv_op = DeformConv
+            offset_channels = 18
+
+        self.conv2_offset = Conv2d(
+            bottleneck_channels,
+            offset_channels * deform_num_groups,
+            kernel_size=3,
+            stride=stride_3x3,
+            padding=1 * dilation,
+            dilation=dilation,
+        )
+        self.conv2 = deform_conv_op(
+            bottleneck_channels,
+            bottleneck_channels,
+            kernel_size=3,
+            stride=stride_3x3,
+            padding=1 * dilation,
+            bias=False,
+            groups=num_groups,
+            dilation=dilation,
+            deformable_groups=deform_num_groups,
+            norm=get_norm(norm, bottleneck_channels),
+        )
+
+        self.conv3 = Conv2d(
+            bottleneck_channels,
+            out_channels,
+            kernel_size=1,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+
+        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
+            if layer is not None:  # shortcut can be None
+                weight_init.c2_msra_fill(layer)
+
+        nn.init.constant_(self.conv2_offset.weight, 0)
+        nn.init.constant_(self.conv2_offset.bias, 0)
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = F.relu_(out)
+
+        if self.deform_modulated:
+            offset_mask = self.conv2_offset(out)
+            offset_x, offset_y, mask = torch.chunk(offset_mask, 3, dim=1)
+            offset = torch.cat((offset_x, offset_y), dim=1)
+            mask = mask.sigmoid()
+            out = self.conv2(out, offset, mask)
+        else:
+            offset = self.conv2_offset(out)
+            out = self.conv2(out, offset)
+        out = F.relu_(out)
+
+        out = self.conv3(out)
+
+        if self.shortcut is not None:
+            shortcut = self.shortcut(x)
+        else:
+            shortcut = x
+
+        out += shortcut
+        out = F.relu_(out)
+        return out
+
+
+class BasicStem(CNNBlockBase):
+    """
+    The standard ResNet stem (layers before the first residual block).
+    """
+
+    def __init__(self, in_channels=3, out_channels=64, norm="BN"):
+        """
+        Args:
+            norm (str or callable): norm after the first conv layer.
+                See :func:`layers.get_norm` for supported format.
+        """
+        super().__init__(in_channels, out_channels, 4)
+        self.in_channels = in_channels
+        self.conv1 = Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=7,
+            stride=2,
+            padding=3,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+        weight_init.c2_msra_fill(self.conv1)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu_(x)
+        x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
+        return x
+
+
+class ResNet(Backbone):
+    """
+    Implement :paper:`ResNet`.
+    """
+
+    def __init__(self, stem, stages, num_classes=None, out_features=None, freeze_at=0):
+        """
+        Args:
+            stem (nn.Module): a stem module
+            stages (list[list[CNNBlockBase]]): several (typically 4) stages,
+                each contains multiple :class:`CNNBlockBase`.
+            num_classes (None or int): if None, will not perform classification.
+                Otherwise, will create a linear layer.
+            out_features (list[str]): name of the layers whose outputs should
+                be returned in forward. Can be anything in "stem", "linear", or "res2" ...
+                If None, will return the output of the last layer.
+            freeze_at (int): The number of stages at the beginning to freeze.
+                see :meth:`freeze` for detailed explanation.
+        """
+        super().__init__()
+        self.stem = stem
+        self.num_classes = num_classes
+
+        current_stride = self.stem.stride
+        self._out_feature_strides = {"stem": current_stride}
+        self._out_feature_channels = {"stem": self.stem.out_channels}
+
+        self.stage_names, self.stages = [], []
+
+        if out_features is not None:
+            # Avoid keeping unused layers in this module. They consume extra memory
+            # and may cause allreduce to fail
+            num_stages = max(
+                [{"res2": 1, "res3": 2, "res4": 3, "res5": 4}.get(f, 0) for f in out_features]
+            )
+            stages = stages[:num_stages]
+        for i, blocks in enumerate(stages):
+            assert len(blocks) > 0, len(blocks)
+            for block in blocks:
+                assert isinstance(block, CNNBlockBase), block
+
+            name = "res" + str(i + 2)
+            stage = nn.Sequential(*blocks)
+
+            self.add_module(name, stage)
+            self.stage_names.append(name)
+            self.stages.append(stage)
+
+            self._out_feature_strides[name] = current_stride = int(
+                current_stride * np.prod([k.stride for k in blocks])
+            )
+            self._out_feature_channels[name] = curr_channels = blocks[-1].out_channels
+        self.stage_names = tuple(self.stage_names)  # Make it static for scripting
+
+        if num_classes is not None:
+            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+            self.linear = nn.Linear(curr_channels, num_classes)
+
+            # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
+            # "The 1000-way fully-connected layer is initialized by
+            # drawing weights from a zero-mean Gaussian with standard deviation of 0.01."
+            nn.init.normal_(self.linear.weight, std=0.01)
+            name = "linear"
+
+        if out_features is None:
+            out_features = [name]
+        self._out_features = out_features
+        assert len(self._out_features)
+        children = [x[0] for x in self.named_children()]
+        for out_feature in self._out_features:
+            assert out_feature in children, "Available children: {}".format(", ".join(children))
+        self.freeze(freeze_at)
+
+    def forward(self, x):
+        """
+        Args:
+            x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``.
+
+        Returns:
+            dict[str->Tensor]: names and the corresponding features
+        """
+        assert x.dim() == 4, f"ResNet takes an input of shape (N, C, H, W). Got {x.shape} instead!"
+        outputs = {}
+        x = self.stem(x)
+        if "stem" in self._out_features:
+            outputs["stem"] = x
+        for name, stage in zip(self.stage_names, self.stages):
+            x = stage(x)
+            if name in self._out_features:
+                outputs[name] = x
+        if self.num_classes is not None:
+            x = self.avgpool(x)
+            x = torch.flatten(x, 1)
+            x = self.linear(x)
+            if "linear" in self._out_features:
+                outputs["linear"] = x
+        return outputs
+
+    def output_shape(self):
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self._out_features
+        }
+
+    def freeze(self, freeze_at=0):
+        """
+        Freeze the first several stages of the ResNet. Commonly used in
+        fine-tuning.
+
+        Layers that produce the same feature map spatial size are defined as one
+        "stage" by :paper:`FPN`.
+
+        Args:
+            freeze_at (int): number of stages to freeze.
+                `1` means freezing the stem. `2` means freezing the stem and
+                one residual stage, etc.
+
+        Returns:
+            nn.Module: this ResNet itself
+        """
+        if freeze_at >= 1:
+            self.stem.freeze()
+        for idx, stage in enumerate(self.stages, start=2):
+            if freeze_at >= idx:
+                for block in stage.children():
+                    block.freeze()
+        return self
+
+    @staticmethod
+    def make_stage(block_class, num_blocks, *, in_channels, out_channels, **kwargs):
+        """
+        Create a list of blocks of the same type that forms one ResNet stage.
+
+        Args:
+            block_class (type): a subclass of CNNBlockBase that's used to create all blocks in this
+                stage. A module of this type must not change spatial resolution of inputs unless its
+                stride != 1.
+            num_blocks (int): number of blocks in this stage
+            in_channels (int): input channels of the entire stage.
+            out_channels (int): output channels of **every block** in the stage.
+            kwargs: other arguments passed to the constructor of
+                `block_class`. If the argument name is "xx_per_block", the
+                argument is a list of values to be passed to each block in the
+                stage. Otherwise, the same argument is passed to every block
+                in the stage.
+
+        Returns:
+            list[CNNBlockBase]: a list of block module.
+
+        Examples:
+        ::
+            stage = ResNet.make_stage(
+                BottleneckBlock, 3, in_channels=16, out_channels=64,
+                bottleneck_channels=16, num_groups=1,
+                stride_per_block=[2, 1, 1],
+                dilations_per_block=[1, 1, 2]
+            )
+
+        Usually, layers that produce the same feature map spatial size are defined as one
+        "stage" (in :paper:`FPN`). Under such definition, ``stride_per_block[1:]`` should
+        all be 1.
+        """
+        blocks = []
+        for i in range(num_blocks):
+            curr_kwargs = {}
+            for k, v in kwargs.items():
+                if k.endswith("_per_block"):
+                    assert len(v) == num_blocks, (
+                        f"Argument '{k}' of make_stage should have the "
+                        f"same length as num_blocks={num_blocks}."
+                    )
+                    newk = k[: -len("_per_block")]
+                    assert newk not in kwargs, f"Cannot call make_stage with both {k} and {newk}!"
+                    curr_kwargs[newk] = v[i]
+                else:
+                    curr_kwargs[k] = v
+
+            blocks.append(
+                block_class(in_channels=in_channels, out_channels=out_channels, **curr_kwargs)
+            )
+            in_channels = out_channels
+        return blocks
+
+    @staticmethod
+    def make_default_stages(depth, block_class=None, **kwargs):
+        """
+        Created list of ResNet stages from pre-defined depth (one of 18, 34, 50, 101, 152).
+        If it doesn't create the ResNet variant you need, please use :meth:`make_stage`
+        instead for fine-grained customization.
+
+        Args:
+            depth (int): depth of ResNet
+            block_class (type): the CNN block class. Has to accept
+                `bottleneck_channels` argument for depth > 50.
+                By default it is BasicBlock or BottleneckBlock, based on the
+                depth.
+            kwargs:
+                other arguments to pass to `make_stage`. Should not contain
+                stride and channels, as they are predefined for each depth.
+
+        Returns:
+            list[list[CNNBlockBase]]: modules in all stages; see arguments of
+                :class:`ResNet.__init__`.
+        """
+        num_blocks_per_stage = {
+            18: [2, 2, 2, 2],
+            34: [3, 4, 6, 3],
+            50: [3, 4, 6, 3],
+            101: [3, 4, 23, 3],
+            152: [3, 8, 36, 3],
+        }[depth]
+        if block_class is None:
+            block_class = BasicBlock if depth < 50 else BottleneckBlock
+        if depth < 50:
+            in_channels = [64, 64, 128, 256]
+            out_channels = [64, 128, 256, 512]
+        else:
+            in_channels = [64, 256, 512, 1024]
+            out_channels = [256, 512, 1024, 2048]
+        ret = []
+        for (n, s, i, o) in zip(num_blocks_per_stage, [1, 2, 2, 2], in_channels, out_channels):
+            if depth >= 50:
+                kwargs["bottleneck_channels"] = o // 4
+            ret.append(
+                ResNet.make_stage(
+                    block_class=block_class,
+                    num_blocks=n,
+                    stride_per_block=[s] + [1] * (n - 1),
+                    in_channels=i,
+                    out_channels=o,
+                    **kwargs,
+                )
+            )
+        return ret
+
+
+ResNetBlockBase = CNNBlockBase
+"""
+Alias for backward compatibiltiy.
+"""
+
+
+def make_stage(*args, **kwargs):
+    """
+    Deprecated alias for backward compatibiltiy.
+    """
+    return ResNet.make_stage(*args, **kwargs)
+
+
+@BACKBONE_REGISTRY.register()
+def build_resnet_backbone(cfg, input_shape):
+    """
+    Create a ResNet instance from config.
+
+    Returns:
+        ResNet: a :class:`ResNet` instance.
+    """
+    # need registration of new blocks/stems?
+    norm = cfg.MODEL.RESNETS.NORM
+    stem = BasicStem(
+        in_channels=input_shape.channels,
+        out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
+        norm=norm,
+    )
+
+    # fmt: off
+    freeze_at           = cfg.MODEL.BACKBONE.FREEZE_AT
+    out_features        = cfg.MODEL.RESNETS.OUT_FEATURES
+    depth               = cfg.MODEL.RESNETS.DEPTH
+    num_groups          = cfg.MODEL.RESNETS.NUM_GROUPS
+    width_per_group     = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
+    bottleneck_channels = num_groups * width_per_group
+    in_channels         = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
+    out_channels        = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
+    stride_in_1x1       = cfg.MODEL.RESNETS.STRIDE_IN_1X1
+    res5_dilation       = cfg.MODEL.RESNETS.RES5_DILATION
+    deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE
+    deform_modulated    = cfg.MODEL.RESNETS.DEFORM_MODULATED
+    deform_num_groups   = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS
+    # fmt: on
+    assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation)
+
+    num_blocks_per_stage = {
+        18: [2, 2, 2, 2],
+        34: [3, 4, 6, 3],
+        50: [3, 4, 6, 3],
+        101: [3, 4, 23, 3],
+        152: [3, 8, 36, 3],
+    }[depth]
+
+    if depth in [18, 34]:
+        assert out_channels == 64, "Must set MODEL.RESNETS.RES2_OUT_CHANNELS = 64 for R18/R34"
+        assert not any(
+            deform_on_per_stage
+        ), "MODEL.RESNETS.DEFORM_ON_PER_STAGE unsupported for R18/R34"
+        assert res5_dilation == 1, "Must set MODEL.RESNETS.RES5_DILATION = 1 for R18/R34"
+        assert num_groups == 1, "Must set MODEL.RESNETS.NUM_GROUPS = 1 for R18/R34"
+
+    stages = []
+
+    for idx, stage_idx in enumerate(range(2, 6)):
+        # res5_dilation is used this way as a convention in R-FCN & Deformable Conv paper
+        dilation = res5_dilation if stage_idx == 5 else 1
+        first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2
+        stage_kargs = {
+            "num_blocks": num_blocks_per_stage[idx],
+            "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1),
+            "in_channels": in_channels,
+            "out_channels": out_channels,
+            "norm": norm,
+        }
+        # Use BasicBlock for R18 and R34.
+        if depth in [18, 34]:
+            stage_kargs["block_class"] = BasicBlock
+        else:
+            stage_kargs["bottleneck_channels"] = bottleneck_channels
+            stage_kargs["stride_in_1x1"] = stride_in_1x1
+            stage_kargs["dilation"] = dilation
+            stage_kargs["num_groups"] = num_groups
+            if deform_on_per_stage[idx]:
+                stage_kargs["block_class"] = DeformBottleneckBlock
+                stage_kargs["deform_modulated"] = deform_modulated
+                stage_kargs["deform_num_groups"] = deform_num_groups
+            else:
+                stage_kargs["block_class"] = BottleneckBlock
+        blocks = ResNet.make_stage(**stage_kargs)
+        in_channels = out_channels
+        out_channels *= 2
+        bottleneck_channels *= 2
+        stages.append(blocks)
+    return ResNet(stem, stages, out_features=out_features, freeze_at=freeze_at)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/swin_transformer.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/swin_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6dea49a72439a7909a4c880068432c34cec8452
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/backbone/swin_transformer.py
@@ -0,0 +1,725 @@
+# --------------------------------------------------------
+# Swin Transformer
+# modified from https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/blob/master/mmdet/models/backbones/swin_transformer.py
+# --------------------------------------------------------
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+import numpy as np
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+
+from . import Backbone
+from .build import BACKBONE_REGISTRY
+from .fpn import FPN, LastLevelMaxPool, LastLevelP6P7
+from detectron2.layers import ShapeSpec
+
+
+class Mlp(nn.Module):
+    """ Multilayer perceptron."""
+
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+
+
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+
+
+class WindowAttention(nn.Module):
+    """ Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+
+    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
+
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x, mask=None):
+        """ Forward function.
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+
+        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, N, N)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class SwinTransformerBlock(nn.Module):
+    """ Swin Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+
+    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        self.H = None
+        self.W = None
+
+    def forward(self, x, mask_matrix):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+            mask_matrix: Attention mask for cyclic shift.
+        """
+        B, L, C = x.shape
+        H, W = self.H, self.W
+        assert L == H * W, "input feature has wrong size"
+
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+
+        # pad feature maps to multiples of window size
+        pad_l = pad_t = 0
+        pad_r = (self.window_size - W % self.window_size) % self.window_size
+        pad_b = (self.window_size - H % self.window_size) % self.window_size
+        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
+        _, Hp, Wp, _ = x.shape
+
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+            attn_mask = mask_matrix
+        else:
+            shifted_x = x
+            attn_mask = None
+
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
+
+        # W-MSA/SW-MSA
+        attn_windows = self.attn(x_windows, mask=attn_mask)  # nW*B, window_size*window_size, C
+
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # B H' W' C
+
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+
+        if pad_r > 0 or pad_b > 0:
+            x = x[:, :H, :W, :].contiguous()
+
+        x = x.view(B, H * W, C)
+
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+
+        return x
+
+
+class PatchMerging(nn.Module):
+    """ Patch Merging Layer
+    Args:
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(4 * dim)
+
+    def forward(self, x, H, W):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+
+        x = x.view(B, H, W, C)
+
+        # padding
+        pad_input = (H % 2 == 1) or (W % 2 == 1)
+        if pad_input:
+            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))
+
+        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
+        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
+        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
+        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
+        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
+        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
+
+        x = self.norm(x)
+        x = self.reduction(x)
+
+        return x
+
+class BasicLayer(nn.Module):
+    """ A basic Swin Transformer layer for one stage.
+    Args:
+        dim (int): Number of feature channels
+        depth (int): Depths of this stage.
+        num_heads (int): Number of attention head.
+        window_size (int): Local window size. Default: 7.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+
+    def __init__(self,
+                 dim,
+                 depth,
+                 num_heads,
+                 window_size=7,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 norm_layer=nn.LayerNorm,
+                 downsample=None,
+                 use_checkpoint=False):
+        super().__init__()
+        self.window_size = window_size
+        self.shift_size = window_size // 2
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+
+        # build blocks
+        self.blocks = nn.ModuleList([
+            SwinTransformerBlock(
+                dim=dim,
+                num_heads=num_heads,
+                window_size=window_size,
+                shift_size=0 if (i % 2 == 0) else window_size // 2,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop,
+                attn_drop=attn_drop,
+                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                norm_layer=norm_layer)
+            for i in range(depth)])
+
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(dim=dim, norm_layer=norm_layer)
+        else:
+            self.downsample = None
+
+    def forward(self, x, H, W):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+
+        # calculate attention mask for SW-MSA
+        Hp = int(np.ceil(H / self.window_size)) * self.window_size
+        Wp = int(np.ceil(W / self.window_size)) * self.window_size
+        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # 1 Hp Wp 1
+        h_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        w_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+
+        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+        for blk in self.blocks:
+            blk.H, blk.W = H, W
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, attn_mask)
+            else:
+                x = blk(x, attn_mask)
+        if self.downsample is not None:
+            x_down = self.downsample(x, H, W)
+            Wh, Ww = (H + 1) // 2, (W + 1) // 2
+            return x, H, W, x_down, Wh, Ww
+        else:
+            return x, H, W, x, H, W
+
+
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    Args:
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+
+    def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        patch_size = to_2tuple(patch_size)
+        self.patch_size = patch_size
+
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+
+    def forward(self, x):
+        """Forward function."""
+        # padding
+        _, _, H, W = x.size()
+        if W % self.patch_size[1] != 0:
+            x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1]))
+        if H % self.patch_size[0] != 0:
+            x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0]))
+
+        x = self.proj(x)  # B C Wh Ww
+        if self.norm is not None:
+            Wh, Ww = x.size(2), x.size(3)
+            x = x.flatten(2).transpose(1, 2)
+            x = self.norm(x)
+            x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww)
+
+        return x
+
+
+class SwinTransformer(Backbone):
+    """ Swin Transformer backbone.
+        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
+          https://arxiv.org/pdf/2103.14030
+    Args:
+        pretrain_img_size (int): Input image size for training the pretrained model,
+            used in absolute postion embedding. Default 224.
+        patch_size (int | tuple(int)): Patch size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        depths (tuple[int]): Depths of each Swin Transformer stage.
+        num_heads (tuple[int]): Number of attention head of each stage.
+        window_size (int): Window size. Default: 7.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set.
+        drop_rate (float): Dropout rate.
+        attn_drop_rate (float): Attention dropout rate. Default: 0.
+        drop_path_rate (float): Stochastic depth rate. Default: 0.2.
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False.
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True.
+        out_indices (Sequence[int]): Output from which stages.
+        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+            -1 means not freezing any parameters.
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+
+    def __init__(self,
+                 pretrain_img_size=224,
+                 patch_size=4,
+                 in_chans=3,
+                 embed_dim=96,
+                 depths=[2, 2, 6, 2],
+                 num_heads=[3, 6, 12, 24],
+                 window_size=7,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop_rate=0.,
+                 attn_drop_rate=0.,
+                 drop_path_rate=0.2,
+                 norm_layer=nn.LayerNorm,
+                 ape=False,
+                 patch_norm=True,
+                 frozen_stages=-1,
+                 use_checkpoint=False,
+                 out_features=None):
+        super(SwinTransformer, self).__init__()
+
+        self.pretrain_img_size = pretrain_img_size
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.frozen_stages = frozen_stages
+
+        self.out_features = out_features
+
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+
+        # absolute position embedding
+        if self.ape:
+            pretrain_img_size = to_2tuple(pretrain_img_size)
+            patch_size = to_2tuple(patch_size)
+            patches_resolution = [pretrain_img_size[0] // patch_size[0], pretrain_img_size[1] // patch_size[1]]
+
+            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, embed_dim, patches_resolution[0], patches_resolution[1]))
+            trunc_normal_(self.absolute_pos_embed, std=.02)
+
+        self.pos_drop = nn.Dropout(p=drop_rate)
+
+        # stochastic depth
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+
+        self._out_feature_strides = {}
+        self._out_feature_channels = {}
+
+        # build layers
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(
+                dim=int(embed_dim * 2 ** i_layer),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                norm_layer=norm_layer,
+                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
+                use_checkpoint=use_checkpoint)
+            self.layers.append(layer)
+
+            stage = f'stage{i_layer+2}'
+            if stage in self.out_features:
+                self._out_feature_channels[stage] = embed_dim * 2 ** i_layer
+                self._out_feature_strides[stage] = 4 * 2 ** i_layer
+ 
+        num_features = [int(embed_dim * 2 ** i) for i in range(self.num_layers)]
+        self.num_features = num_features
+
+        # add a norm layer for each output
+        for i_layer in range(self.num_layers):
+            stage = f'stage{i_layer+2}'
+            if stage in self.out_features:
+                layer = norm_layer(num_features[i_layer])
+                layer_name = f'norm{i_layer}'
+                self.add_module(layer_name, layer)
+
+        self._freeze_stages()
+
+    def _freeze_stages(self):
+        if self.frozen_stages >= 0:
+            self.patch_embed.eval()
+            for param in self.patch_embed.parameters():
+                param.requires_grad = False
+
+        if self.frozen_stages >= 1 and self.ape:
+            self.absolute_pos_embed.requires_grad = False
+
+        if self.frozen_stages >= 2:
+            self.pos_drop.eval()
+            for i in range(0, self.frozen_stages - 1):
+                m = self.layers[i]
+                m.eval()
+                for param in m.parameters():
+                    param.requires_grad = False
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+
+        def _init_weights(m):
+            if isinstance(m, nn.Linear):
+                trunc_normal_(m.weight, std=.02)
+                if isinstance(m, nn.Linear) and m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.LayerNorm):
+                nn.init.constant_(m.bias, 0)
+                nn.init.constant_(m.weight, 1.0)
+
+        self.apply(_init_weights)
+
+    def forward(self, x):
+        """Forward function."""
+        x = self.patch_embed(x)
+
+        Wh, Ww = x.size(2), x.size(3)
+        if self.ape:
+            # interpolate the position embedding to the corresponding size
+            absolute_pos_embed = F.interpolate(self.absolute_pos_embed, size=(Wh, Ww), mode='bicubic')
+            x = (x + absolute_pos_embed).flatten(2).transpose(1, 2)  # B Wh*Ww C
+        else:
+            x = x.flatten(2).transpose(1, 2)
+        x = self.pos_drop(x)
+
+        outs = {}
+        for i in range(self.num_layers):
+            layer = self.layers[i]
+            x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww)
+            name = f'stage{i+2}'
+            if name in self.out_features:
+                norm_layer = getattr(self, f'norm{i}')
+                x_out = norm_layer(x_out)
+                out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous()
+                outs[name] = out
+
+        return outs #{"stage%d" % (i+2,): out for i, out in enumerate(outs)} #tuple(outs)
+
+    def train(self, mode=True):
+        """Convert the model into training mode while keep layers freezed."""
+        super(SwinTransformer, self).train(mode)
+        self._freeze_stages()
+
+    def output_shape(self):
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self.out_features
+        }
+
+@BACKBONE_REGISTRY.register()
+def build_swint_backbone(cfg, input_shape):
+    """
+    Create a SwinT instance from config.
+
+    Returns:
+        VoVNet: a :class:`VoVNet` instance.
+    """
+    out_features = cfg.MODEL.SWINT.OUT_FEATURES
+
+    return SwinTransformer(
+        patch_size=4,
+        in_chans=input_shape.channels,
+        embed_dim=cfg.MODEL.SWINT.EMBED_DIM,
+        depths=cfg.MODEL.SWINT.DEPTHS,
+        num_heads=cfg.MODEL.SWINT.NUM_HEADS,
+        window_size=cfg.MODEL.SWINT.WINDOW_SIZE,
+        mlp_ratio=cfg.MODEL.SWINT.MLP_RATIO,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        drop_path_rate=cfg.MODEL.SWINT.DROP_PATH_RATE,
+        norm_layer=nn.LayerNorm,
+        ape=cfg.MODEL.SWINT.APE,
+        patch_norm=True,
+        frozen_stages=cfg.MODEL.BACKBONE.FREEZE_AT,
+        out_features=out_features
+    )
+
+
+@BACKBONE_REGISTRY.register()
+def build_swint_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_swint_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelMaxPool(),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
+
+class LastLevelP6(nn.Module):
+    """
+    This module is used in FCOS to generate extra layers
+    """
+
+    def __init__(self, in_channels, out_channels, in_features="res5"):
+        super().__init__()
+        self.num_levels = 1
+        self.in_feature = in_features
+        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
+        for module in [self.p6]:
+            weight_init.c2_xavier_fill(module)
+
+    def forward(self, x):
+        p6 = self.p6(x)
+        return [p6]
+
+@BACKBONE_REGISTRY.register()
+def build_retinanet_swint_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_swint_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    top_levels = cfg.MODEL.FPN.TOP_LEVELS
+    in_channels_top = out_channels
+    if top_levels == 2:
+        top_block = LastLevelP6P7(in_channels_top, out_channels, "p5")
+    if top_levels == 1:
+        top_block = LastLevelP6(in_channels_top, out_channels, "p5")
+    elif top_levels == 0:
+        top_block = None
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=top_block,
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/box_regression.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/box_regression.py
new file mode 100644
index 0000000000000000000000000000000000000000..12be0008b66bd4954a5139aeb6e07d71f8159caa
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/box_regression.py
@@ -0,0 +1,270 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+from typing import List, Tuple
+import torch
+from fvcore.nn import giou_loss, smooth_l1_loss
+
+from detectron2.layers import cat
+from detectron2.structures import Boxes
+
+# Value for clamping large dw and dh predictions. The heuristic is that we clamp
+# such that dw and dh are no larger than what would transform a 16px box into a
+# 1000px box (based on a small anchor, 16px, and a typical image size, 1000px).
+_DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16)
+
+
+__all__ = ["Box2BoxTransform", "Box2BoxTransformRotated"]
+
+
+@torch.jit.script
+class Box2BoxTransform(object):
+    """
+    The box-to-box transform defined in R-CNN. The transformation is parameterized
+    by 4 deltas: (dx, dy, dw, dh). The transformation scales the box's width and height
+    by exp(dw), exp(dh) and shifts a box's center by the offset (dx * width, dy * height).
+    """
+
+    def __init__(
+        self, weights: Tuple[float, float, float, float], scale_clamp: float = _DEFAULT_SCALE_CLAMP
+    ):
+        """
+        Args:
+            weights (4-element tuple): Scaling factors that are applied to the
+                (dx, dy, dw, dh) deltas. In Fast R-CNN, these were originally set
+                such that the deltas have unit variance; now they are treated as
+                hyperparameters of the system.
+            scale_clamp (float): When predicting deltas, the predicted box scaling
+                factors (dw and dh) are clamped such that they are <= scale_clamp.
+        """
+        self.weights = weights
+        self.scale_clamp = scale_clamp
+
+    def get_deltas(self, src_boxes, target_boxes):
+        """
+        Get box regression transformation deltas (dx, dy, dw, dh) that can be used
+        to transform the `src_boxes` into the `target_boxes`. That is, the relation
+        ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless
+        any delta is too large and is clamped).
+
+        Args:
+            src_boxes (Tensor): source boxes, e.g., object proposals
+            target_boxes (Tensor): target of the transformation, e.g., ground-truth
+                boxes.
+        """
+        assert isinstance(src_boxes, torch.Tensor), type(src_boxes)
+        assert isinstance(target_boxes, torch.Tensor), type(target_boxes)
+
+        src_widths = src_boxes[:, 2] - src_boxes[:, 0]
+        src_heights = src_boxes[:, 3] - src_boxes[:, 1]
+        src_ctr_x = src_boxes[:, 0] + 0.5 * src_widths
+        src_ctr_y = src_boxes[:, 1] + 0.5 * src_heights
+
+        target_widths = target_boxes[:, 2] - target_boxes[:, 0]
+        target_heights = target_boxes[:, 3] - target_boxes[:, 1]
+        target_ctr_x = target_boxes[:, 0] + 0.5 * target_widths
+        target_ctr_y = target_boxes[:, 1] + 0.5 * target_heights
+
+        wx, wy, ww, wh = self.weights
+        dx = wx * (target_ctr_x - src_ctr_x) / src_widths
+        dy = wy * (target_ctr_y - src_ctr_y) / src_heights
+        dw = ww * torch.log(target_widths / src_widths)
+        dh = wh * torch.log(target_heights / src_heights)
+
+        deltas = torch.stack((dx, dy, dw, dh), dim=1)
+        assert (src_widths > 0).all().item(), "Input boxes to Box2BoxTransform are not valid!"
+        return deltas
+
+    def apply_deltas(self, deltas, boxes):
+        """
+        Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`.
+
+        Args:
+            deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1.
+                deltas[i] represents k potentially different class-specific
+                box transformations for the single box boxes[i].
+            boxes (Tensor): boxes to transform, of shape (N, 4)
+        """
+        deltas = deltas.float()  # ensure fp32 for decoding precision
+        boxes = boxes.to(deltas.dtype)
+
+        widths = boxes[:, 2] - boxes[:, 0]
+        heights = boxes[:, 3] - boxes[:, 1]
+        ctr_x = boxes[:, 0] + 0.5 * widths
+        ctr_y = boxes[:, 1] + 0.5 * heights
+
+        wx, wy, ww, wh = self.weights
+        dx = deltas[:, 0::4] / wx
+        dy = deltas[:, 1::4] / wy
+        dw = deltas[:, 2::4] / ww
+        dh = deltas[:, 3::4] / wh
+
+        # Prevent sending too large values into torch.exp()
+        dw = torch.clamp(dw, max=self.scale_clamp)
+        dh = torch.clamp(dh, max=self.scale_clamp)
+
+        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
+        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
+        pred_w = torch.exp(dw) * widths[:, None]
+        pred_h = torch.exp(dh) * heights[:, None]
+
+        x1 = pred_ctr_x - 0.5 * pred_w
+        y1 = pred_ctr_y - 0.5 * pred_h
+        x2 = pred_ctr_x + 0.5 * pred_w
+        y2 = pred_ctr_y + 0.5 * pred_h
+        pred_boxes = torch.stack((x1, y1, x2, y2), dim=-1)
+        return pred_boxes.reshape(deltas.shape)
+
+
+@torch.jit.script
+class Box2BoxTransformRotated(object):
+    """
+    The box-to-box transform defined in Rotated R-CNN. The transformation is parameterized
+    by 5 deltas: (dx, dy, dw, dh, da). The transformation scales the box's width and height
+    by exp(dw), exp(dh), shifts a box's center by the offset (dx * width, dy * height),
+    and rotate a box's angle by da (radians).
+    Note: angles of deltas are in radians while angles of boxes are in degrees.
+    """
+
+    def __init__(
+        self,
+        weights: Tuple[float, float, float, float, float],
+        scale_clamp: float = _DEFAULT_SCALE_CLAMP,
+    ):
+        """
+        Args:
+            weights (5-element tuple): Scaling factors that are applied to the
+                (dx, dy, dw, dh, da) deltas. These are treated as
+                hyperparameters of the system.
+            scale_clamp (float): When predicting deltas, the predicted box scaling
+                factors (dw and dh) are clamped such that they are <= scale_clamp.
+        """
+        self.weights = weights
+        self.scale_clamp = scale_clamp
+
+    def get_deltas(self, src_boxes, target_boxes):
+        """
+        Get box regression transformation deltas (dx, dy, dw, dh, da) that can be used
+        to transform the `src_boxes` into the `target_boxes`. That is, the relation
+        ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless
+        any delta is too large and is clamped).
+
+        Args:
+            src_boxes (Tensor): Nx5 source boxes, e.g., object proposals
+            target_boxes (Tensor): Nx5 target of the transformation, e.g., ground-truth
+                boxes.
+        """
+        assert isinstance(src_boxes, torch.Tensor), type(src_boxes)
+        assert isinstance(target_boxes, torch.Tensor), type(target_boxes)
+
+        src_ctr_x, src_ctr_y, src_widths, src_heights, src_angles = torch.unbind(src_boxes, dim=1)
+
+        target_ctr_x, target_ctr_y, target_widths, target_heights, target_angles = torch.unbind(
+            target_boxes, dim=1
+        )
+
+        wx, wy, ww, wh, wa = self.weights
+        dx = wx * (target_ctr_x - src_ctr_x) / src_widths
+        dy = wy * (target_ctr_y - src_ctr_y) / src_heights
+        dw = ww * torch.log(target_widths / src_widths)
+        dh = wh * torch.log(target_heights / src_heights)
+        # Angles of deltas are in radians while angles of boxes are in degrees.
+        # the conversion to radians serve as a way to normalize the values
+        da = target_angles - src_angles
+        da = (da + 180.0) % 360.0 - 180.0  # make it in [-180, 180)
+        da *= wa * math.pi / 180.0
+
+        deltas = torch.stack((dx, dy, dw, dh, da), dim=1)
+        assert (
+            (src_widths > 0).all().item()
+        ), "Input boxes to Box2BoxTransformRotated are not valid!"
+        return deltas
+
+    def apply_deltas(self, deltas, boxes):
+        """
+        Apply transformation `deltas` (dx, dy, dw, dh, da) to `boxes`.
+
+        Args:
+            deltas (Tensor): transformation deltas of shape (N, k*5).
+                deltas[i] represents box transformation for the single box boxes[i].
+            boxes (Tensor): boxes to transform, of shape (N, 5)
+        """
+        assert deltas.shape[1] % 5 == 0 and boxes.shape[1] == 5
+
+        boxes = boxes.to(deltas.dtype).unsqueeze(2)
+
+        ctr_x = boxes[:, 0]
+        ctr_y = boxes[:, 1]
+        widths = boxes[:, 2]
+        heights = boxes[:, 3]
+        angles = boxes[:, 4]
+
+        wx, wy, ww, wh, wa = self.weights
+
+        dx = deltas[:, 0::5] / wx
+        dy = deltas[:, 1::5] / wy
+        dw = deltas[:, 2::5] / ww
+        dh = deltas[:, 3::5] / wh
+        da = deltas[:, 4::5] / wa
+
+        # Prevent sending too large values into torch.exp()
+        dw = torch.clamp(dw, max=self.scale_clamp)
+        dh = torch.clamp(dh, max=self.scale_clamp)
+
+        pred_boxes = torch.zeros_like(deltas)
+        pred_boxes[:, 0::5] = dx * widths + ctr_x  # x_ctr
+        pred_boxes[:, 1::5] = dy * heights + ctr_y  # y_ctr
+        pred_boxes[:, 2::5] = torch.exp(dw) * widths  # width
+        pred_boxes[:, 3::5] = torch.exp(dh) * heights  # height
+
+        # Following original RRPN implementation,
+        # angles of deltas are in radians while angles of boxes are in degrees.
+        pred_angle = da * 180.0 / math.pi + angles
+        pred_angle = (pred_angle + 180.0) % 360.0 - 180.0  # make it in [-180, 180)
+
+        pred_boxes[:, 4::5] = pred_angle
+
+        return pred_boxes
+
+
+def _dense_box_regression_loss(
+    anchors: List[Boxes],
+    box2box_transform: Box2BoxTransform,
+    pred_anchor_deltas: List[torch.Tensor],
+    gt_boxes: List[torch.Tensor],
+    fg_mask: torch.Tensor,
+    box_reg_loss_type="smooth_l1",
+    smooth_l1_beta=0.0,
+):
+    """
+    Compute loss for dense multi-level box regression.
+    Loss is accumulated over ``fg_mask``.
+
+    Args:
+        anchors: #lvl anchor boxes, each is (HixWixA, 4)
+        pred_anchor_deltas: #lvl predictions, each is (N, HixWixA, 4)
+        gt_boxes: N ground truth boxes, each has shape (R, 4) (R = sum(Hi * Wi * A))
+        fg_mask: the foreground boolean mask of shape (N, R) to compute loss on
+        box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou".
+        smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to
+            use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1"
+    """
+    anchors = type(anchors[0]).cat(anchors).tensor  # (R, 4)
+    if box_reg_loss_type == "smooth_l1":
+        gt_anchor_deltas = [box2box_transform.get_deltas(anchors, k) for k in gt_boxes]
+        gt_anchor_deltas = torch.stack(gt_anchor_deltas)  # (N, R, 4)
+        loss_box_reg = smooth_l1_loss(
+            cat(pred_anchor_deltas, dim=1)[fg_mask],
+            gt_anchor_deltas[fg_mask],
+            beta=smooth_l1_beta,
+            reduction="sum",
+        )
+    elif box_reg_loss_type == "giou":
+        pred_boxes = [
+            box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1)
+        ]
+        loss_box_reg = giou_loss(
+            torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum"
+        )
+    else:
+        raise ValueError(f"Invalid dense box regression loss type '{box_reg_loss_type}'")
+    return loss_box_reg
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/matcher.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..666913f76fb0b9d8a277541716f91872d8246250
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/matcher.py
@@ -0,0 +1,126 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import List
+import torch
+
+from detectron2.layers import nonzero_tuple
+
+
+class Matcher(object):
+    """
+    This class assigns to each predicted "element" (e.g., a box) a ground-truth
+    element. Each predicted element will have exactly zero or one matches; each
+    ground-truth element may be matched to zero or more predicted elements.
+
+    The matching is determined by the MxN match_quality_matrix, that characterizes
+    how well each (ground-truth, prediction)-pair match each other. For example,
+    if the elements are boxes, this matrix may contain box intersection-over-union
+    overlap values.
+
+    The matcher returns (a) a vector of length N containing the index of the
+    ground-truth element m in [0, M) that matches to prediction n in [0, N).
+    (b) a vector of length N containing the labels for each prediction.
+    """
+
+    def __init__(
+        self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False
+    ):
+        """
+        Args:
+            thresholds (list): a list of thresholds used to stratify predictions
+                into levels.
+            labels (list): a list of values to label predictions belonging at
+                each level. A label can be one of {-1, 0, 1} signifying
+                {ignore, negative class, positive class}, respectively.
+            allow_low_quality_matches (bool): if True, produce additional matches
+                for predictions with maximum match quality lower than high_threshold.
+                See set_low_quality_matches_ for more details.
+
+            For example,
+                thresholds = [0.3, 0.5]
+                labels = [0, -1, 1]
+                All predictions with iou < 0.3 will be marked with 0 and
+                thus will be considered as false positives while training.
+                All predictions with 0.3 <= iou < 0.5 will be marked with -1 and
+                thus will be ignored.
+                All predictions with 0.5 <= iou will be marked with 1 and
+                thus will be considered as true positives.
+        """
+        # Add -inf and +inf to first and last position in thresholds
+        thresholds = thresholds[:]
+        assert thresholds[0] > 0
+        thresholds.insert(0, -float("inf"))
+        thresholds.append(float("inf"))
+        # Currently torchscript does not support all + generator
+        assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])])
+        assert all([l in [-1, 0, 1] for l in labels])
+        assert len(labels) == len(thresholds) - 1
+        self.thresholds = thresholds
+        self.labels = labels
+        self.allow_low_quality_matches = allow_low_quality_matches
+
+    def __call__(self, match_quality_matrix):
+        """
+        Args:
+            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
+                pairwise quality between M ground-truth elements and N predicted
+                elements. All elements must be >= 0 (due to the us of `torch.nonzero`
+                for selecting indices in :meth:`set_low_quality_matches_`).
+
+        Returns:
+            matches (Tensor[int64]): a vector of length N, where matches[i] is a matched
+                ground-truth index in [0, M)
+            match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates
+                whether a prediction is a true or false positive or ignored
+        """
+        assert match_quality_matrix.dim() == 2
+        if match_quality_matrix.numel() == 0:
+            default_matches = match_quality_matrix.new_full(
+                (match_quality_matrix.size(1),), 0, dtype=torch.int64
+            )
+            # When no gt boxes exist, we define IOU = 0 and therefore set labels
+            # to `self.labels[0]`, which usually defaults to background class 0
+            # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds
+            default_match_labels = match_quality_matrix.new_full(
+                (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8
+            )
+            return default_matches, default_match_labels
+
+        assert torch.all(match_quality_matrix >= 0)
+
+        # match_quality_matrix is M (gt) x N (predicted)
+        # Max over gt elements (dim 0) to find best gt candidate for each prediction
+        matched_vals, matches = match_quality_matrix.max(dim=0)
+
+        match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
+
+        for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
+            low_high = (matched_vals >= low) & (matched_vals < high)
+            match_labels[low_high] = l
+
+        if self.allow_low_quality_matches:
+            self.set_low_quality_matches_(match_labels, match_quality_matrix)
+
+        return matches, match_labels
+
+    def set_low_quality_matches_(self, match_labels, match_quality_matrix):
+        """
+        Produce additional matches for predictions that have only low-quality matches.
+        Specifically, for each ground-truth G find the set of predictions that have
+        maximum overlap with it (including ties); for each prediction in that set, if
+        it is unmatched, then match it to the ground-truth G.
+
+        This function implements the RPN assignment case (i) in Sec. 3.1.2 of
+        :paper:`Faster R-CNN`.
+        """
+        # For each gt, find the prediction with which it has highest quality
+        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
+        # Find the highest quality match available, even if it is low, including ties.
+        # Note that the matches qualities must be positive due to the use of
+        # `torch.nonzero`.
+        _, pred_inds_with_highest_quality = nonzero_tuple(
+            match_quality_matrix == highest_quality_foreach_gt[:, None]
+        )
+        # If an anchor was labeled positive only due to a low-quality match
+        # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B.
+        # This follows the implementation in Detectron, and is found to have no significant impact.
+        match_labels[pred_inds_with_highest_quality] = 1
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4cb86ccfd7415db649fa414507992497e542c0b
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/__init__.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from .build import META_ARCH_REGISTRY, build_model  # isort:skip
+
+from .panoptic_fpn import PanopticFPN
+
+# import all the meta_arch, so they will be registered
+from .rcnn import GeneralizedRCNN, ProposalNetwork
+from .retinanet import RetinaNet
+from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head
+
+
+__all__ = list(globals().keys())
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/build.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..3427215746c9a146bd902f22ea9b26d121c36b27
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/build.py
@@ -0,0 +1,25 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+
+from detectron2.utils.logger import _log_api_usage
+from detectron2.utils.registry import Registry
+
+META_ARCH_REGISTRY = Registry("META_ARCH")  # noqa F401 isort:skip
+META_ARCH_REGISTRY.__doc__ = """
+Registry for meta-architectures, i.e. the whole model.
+
+The registered object will be called with `obj(cfg)`
+and expected to return a `nn.Module` object.
+"""
+
+
+def build_model(cfg):
+    """
+    Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
+    Note that it does not load any weights from ``cfg``.
+    """
+    meta_arch = cfg.MODEL.META_ARCHITECTURE
+    model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
+    model.to(torch.device(cfg.MODEL.DEVICE))
+    _log_api_usage("modeling.meta_arch." + meta_arch)
+    return model
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/panoptic_fpn.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/panoptic_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d18f3d64ac0f4e988ead4870f8b0f65f894a36b
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/panoptic_fpn.py
@@ -0,0 +1,268 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+from typing import Dict, Tuple
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.structures import ImageList
+
+from ..postprocessing import detector_postprocess, sem_seg_postprocess
+from .build import META_ARCH_REGISTRY
+from .rcnn import GeneralizedRCNN
+from .semantic_seg import build_sem_seg_head
+
+__all__ = ["PanopticFPN"]
+
+
+@META_ARCH_REGISTRY.register()
+class PanopticFPN(GeneralizedRCNN):
+    """
+    Implement the paper :paper:`PanopticFPN`.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        sem_seg_head: nn.Module,
+        combine_overlap_thresh: float = 0.5,
+        combine_stuff_area_thresh: float = 4096,
+        combine_instances_score_thresh: float = 0.5,
+        **kwargs
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            sem_seg_head: a module for the semantic segmentation head.
+            combine_overlap_thresh: combine masks into one instances if
+                they have enough overlap
+            combine_stuff_area_thresh: ignore stuff areas smaller than this threshold
+            combine_instances_score_thresh: ignore instances whose score is
+                smaller than this threshold
+
+        Other arguments are the same as :class:`GeneralizedRCNN`.
+        """
+        super().__init__(**kwargs)
+        self.sem_seg_head = sem_seg_head
+        # options when combining instance & semantic outputs
+        self.combine_overlap_thresh = combine_overlap_thresh
+        self.combine_stuff_area_thresh = combine_stuff_area_thresh
+        self.combine_instances_score_thresh = combine_instances_score_thresh
+
+    @classmethod
+    def from_config(cls, cfg):
+        ret = super().from_config(cfg)
+        ret.update(
+            {
+                "combine_overlap_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH,
+                "combine_stuff_area_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT,
+                "combine_instances_score_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH,  # noqa
+            }
+        )
+        ret["sem_seg_head"] = build_sem_seg_head(cfg, ret["backbone"].output_shape())
+        logger = logging.getLogger(__name__)
+        if not cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED:
+            logger.warning(
+                "PANOPTIC_FPN.COMBINED.ENABLED is no longer used. "
+                " model.inference(do_postprocess=) should be used to toggle postprocessing."
+            )
+        if cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT != 1.0:
+            w = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT
+            logger.warning(
+                "PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT should be replaced by weights on each ROI head."
+            )
+
+            def update_weight(x):
+                if isinstance(x, dict):
+                    return {k: v * w for k, v in x.items()}
+                else:
+                    return x * w
+
+            roi_heads = ret["roi_heads"]
+            roi_heads.box_predictor.loss_weight = update_weight(roi_heads.box_predictor.loss_weight)
+            roi_heads.mask_head.loss_weight = update_weight(roi_heads.mask_head.loss_weight)
+        return ret
+
+    def forward(self, batched_inputs):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
+                Each item in the list contains the inputs for one image.
+
+                For now, each item in the list is a dict that contains:
+
+                * "image": Tensor, image in (C, H, W) format.
+                * "instances": Instances
+                * "sem_seg": semantic segmentation ground truth.
+                * Other information that's included in the original dicts, such as:
+                  "height", "width" (int): the output resolution of the model, used in inference.
+                  See :meth:`postprocess` for details.
+
+        Returns:
+            list[dict]:
+                each dict has the results for one image. The dict contains the following keys:
+
+                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
+                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
+                * "panoptic_seg": See the return value of
+                  :func:`combine_semantic_and_instance_outputs` for its format.
+        """
+        if not self.training:
+            return self.inference(batched_inputs)
+        images = self.preprocess_image(batched_inputs)
+        features = self.backbone(images.tensor)
+
+        assert "sem_seg" in batched_inputs[0]
+        gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
+        gt_sem_seg = ImageList.from_tensors(
+            gt_sem_seg, self.backbone.size_divisibility, self.sem_seg_head.ignore_value
+        ).tensor
+        sem_seg_results, sem_seg_losses = self.sem_seg_head(features, gt_sem_seg)
+
+        gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+        proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
+        detector_results, detector_losses = self.roi_heads(
+            images, features, proposals, gt_instances
+        )
+
+        losses = sem_seg_losses
+        losses.update(proposal_losses)
+        losses.update(detector_losses)
+        return losses
+
+    def inference(
+        self, batched_inputs: Tuple[Dict[str, torch.Tensor]], do_postprocess: bool = True
+    ):
+        """
+        Run inference on the given inputs.
+
+        Args:
+            batched_inputs (list[dict]): same as in :meth:`forward`
+            do_postprocess (bool): whether to apply post-processing on the outputs.
+
+        Returns:
+            When do_postprocess=True, see docs in :meth:`forward`.
+            Otherwise, returns a (list[Instances], list[Tensor]) that contains
+            the raw detector outputs, and raw semantic segmentation outputs.
+        """
+        images = self.preprocess_image(batched_inputs)
+        features = self.backbone(images.tensor)
+        sem_seg_results, sem_seg_losses = self.sem_seg_head(features, None)
+        proposals, _ = self.proposal_generator(images, features, None)
+        detector_results, _ = self.roi_heads(images, features, proposals, None)
+
+        if do_postprocess:
+            processed_results = []
+            for sem_seg_result, detector_result, input_per_image, image_size in zip(
+                sem_seg_results, detector_results, batched_inputs, images.image_sizes
+            ):
+                height = input_per_image.get("height", image_size[0])
+                width = input_per_image.get("width", image_size[1])
+                sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
+                detector_r = detector_postprocess(detector_result, height, width)
+
+                processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r})
+
+                panoptic_r = combine_semantic_and_instance_outputs(
+                    detector_r,
+                    sem_seg_r.argmax(dim=0),
+                    self.combine_overlap_thresh,
+                    self.combine_stuff_area_thresh,
+                    self.combine_instances_score_thresh,
+                )
+                processed_results[-1]["panoptic_seg"] = panoptic_r
+            return processed_results
+        else:
+            return detector_results, sem_seg_results
+
+
+def combine_semantic_and_instance_outputs(
+    instance_results,
+    semantic_results,
+    overlap_threshold,
+    stuff_area_thresh,
+    instances_score_thresh,
+):
+    """
+    Implement a simple combining logic following
+    "combine_semantic_and_instance_predictions.py" in panopticapi
+    to produce panoptic segmentation outputs.
+
+    Args:
+        instance_results: output of :func:`detector_postprocess`.
+        semantic_results: an (H, W) tensor, each element is the contiguous semantic
+            category id
+
+    Returns:
+        panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment.
+        segments_info (list[dict]): Describe each segment in `panoptic_seg`.
+            Each dict contains keys "id", "category_id", "isthing".
+    """
+    panoptic_seg = torch.zeros_like(semantic_results, dtype=torch.int32)
+
+    # sort instance outputs by scores
+    sorted_inds = torch.argsort(-instance_results.scores)
+
+    current_segment_id = 0
+    segments_info = []
+
+    instance_masks = instance_results.pred_masks.to(dtype=torch.bool, device=panoptic_seg.device)
+
+    # Add instances one-by-one, check for overlaps with existing ones
+    for inst_id in sorted_inds:
+        score = instance_results.scores[inst_id].item()
+        if score < instances_score_thresh:
+            break
+        mask = instance_masks[inst_id]  # H,W
+        mask_area = mask.sum().item()
+
+        if mask_area == 0:
+            continue
+
+        intersect = (mask > 0) & (panoptic_seg > 0)
+        intersect_area = intersect.sum().item()
+
+        if intersect_area * 1.0 / mask_area > overlap_threshold:
+            continue
+
+        if intersect_area > 0:
+            mask = mask & (panoptic_seg == 0)
+
+        current_segment_id += 1
+        panoptic_seg[mask] = current_segment_id
+        segments_info.append(
+            {
+                "id": current_segment_id,
+                "isthing": True,
+                "score": score,
+                "category_id": instance_results.pred_classes[inst_id].item(),
+                "instance_id": inst_id.item(),
+            }
+        )
+
+    # Add semantic results to remaining empty areas
+    semantic_labels = torch.unique(semantic_results).cpu().tolist()
+    for semantic_label in semantic_labels:
+        if semantic_label == 0:  # 0 is a special "thing" class
+            continue
+        mask = (semantic_results == semantic_label) & (panoptic_seg == 0)
+        mask_area = mask.sum().item()
+        if mask_area < stuff_area_thresh:
+            continue
+
+        current_segment_id += 1
+        panoptic_seg[mask] = current_segment_id
+        segments_info.append(
+            {
+                "id": current_segment_id,
+                "isthing": False,
+                "category_id": semantic_label,
+                "area": mask_area,
+            }
+        )
+
+    return panoptic_seg, segments_info
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/rcnn.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..83e5c187cf420a417dc067f2fcf92d48fb05a666
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/rcnn.py
@@ -0,0 +1,327 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.data.detection_utils import convert_image_to_rgb
+from detectron2.structures import ImageList, Instances
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.logger import log_first_n
+
+from ..backbone import Backbone, build_backbone
+from ..postprocessing import detector_postprocess
+from ..proposal_generator import build_proposal_generator
+from ..roi_heads import build_roi_heads
+from .build import META_ARCH_REGISTRY
+
+__all__ = ["GeneralizedRCNN", "ProposalNetwork"]
+
+
+@META_ARCH_REGISTRY.register()
+class GeneralizedRCNN(nn.Module):
+    """
+    Generalized R-CNN. Any models that contains the following three components:
+    1. Per-image feature extraction (aka backbone)
+    2. Region proposal generation
+    3. Per-region feature extraction and prediction
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        backbone: Backbone,
+        proposal_generator: nn.Module,
+        roi_heads: nn.Module,
+        pixel_mean: Tuple[float],
+        pixel_std: Tuple[float],
+        input_format: Optional[str] = None,
+        vis_period: int = 0,
+    ):
+        """
+        Args:
+            backbone: a backbone module, must follow detectron2's backbone interface
+            proposal_generator: a module that generates proposals using backbone features
+            roi_heads: a ROI head that performs per-region computation
+            pixel_mean, pixel_std: list or tuple with #channels element, representing
+                the per-channel mean and std to be used to normalize the input image
+            input_format: describe the meaning of channels of input. Needed by visualization
+            vis_period: the period to run visualization. Set to 0 to disable.
+        """
+        super().__init__()
+        self.backbone = backbone
+        self.proposal_generator = proposal_generator
+        self.roi_heads = roi_heads
+
+        self.input_format = input_format
+        self.vis_period = vis_period
+        if vis_period > 0:
+            assert input_format is not None, "input_format is required for visualization!"
+
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+        assert (
+            self.pixel_mean.shape == self.pixel_std.shape
+        ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!"
+
+    @classmethod
+    def from_config(cls, cfg):
+        backbone = build_backbone(cfg)
+        return {
+            "backbone": backbone,
+            "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()),
+            "roi_heads": build_roi_heads(cfg, backbone.output_shape()),
+            "input_format": cfg.INPUT.FORMAT,
+            "vis_period": cfg.VIS_PERIOD,
+            "pixel_mean": cfg.MODEL.PIXEL_MEAN,
+            "pixel_std": cfg.MODEL.PIXEL_STD,
+        }
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def visualize_training(self, batched_inputs, proposals):
+        """
+        A function used to visualize images and proposals. It shows ground truth
+        bounding boxes on the original image and up to 20 top-scoring predicted
+        object proposals on the original image. Users can implement different
+        visualization functions for different models.
+
+        Args:
+            batched_inputs (list): a list that contains input to the model.
+            proposals (list): a list that contains predicted proposals. Both
+                batched_inputs and proposals should have the same length.
+        """
+        from detectron2.utils.visualizer import Visualizer
+
+        storage = get_event_storage()
+        max_vis_prop = 20
+
+        for input, prop in zip(batched_inputs, proposals):
+            img = input["image"]
+            img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format)
+            v_gt = Visualizer(img, None)
+            v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes)
+            anno_img = v_gt.get_image()
+            box_size = min(len(prop.proposal_boxes), max_vis_prop)
+            v_pred = Visualizer(img, None)
+            v_pred = v_pred.overlay_instances(
+                boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy()
+            )
+            prop_img = v_pred.get_image()
+            vis_img = np.concatenate((anno_img, prop_img), axis=1)
+            vis_img = vis_img.transpose(2, 0, 1)
+            vis_name = "Left: GT bounding boxes;  Right: Predicted proposals"
+            storage.put_image(vis_name, vis_img)
+            break  # only visualize one image in a batch
+
+    def forward(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
+                Each item in the list contains the inputs for one image.
+                For now, each item in the list is a dict that contains:
+
+                * image: Tensor, image in (C, H, W) format.
+                * instances (optional): groundtruth :class:`Instances`
+                * proposals (optional): :class:`Instances`, precomputed proposals.
+
+                Other information that's included in the original dicts, such as:
+
+                * "height", "width" (int): the output resolution of the model, used in inference.
+                  See :meth:`postprocess` for details.
+
+        Returns:
+            list[dict]:
+                Each dict is the output for one input image.
+                The dict contains one key "instances" whose value is a :class:`Instances`.
+                The :class:`Instances` object has the following keys:
+                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
+        """
+        if not self.training:
+            return self.inference(batched_inputs)
+
+        images = self.preprocess_image(batched_inputs)
+        if "instances" in batched_inputs[0]:
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+        else:
+            gt_instances = None
+
+        features = self.backbone(images.tensor)
+
+        if self.proposal_generator is not None:
+            proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
+        else:
+            assert "proposals" in batched_inputs[0]
+            proposals = [x["proposals"].to(self.device) for x in batched_inputs]
+            proposal_losses = {}
+
+        _, detector_losses = self.roi_heads(images, features, proposals, gt_instances)
+        if self.vis_period > 0:
+            storage = get_event_storage()
+            if storage.iter % self.vis_period == 0:
+                self.visualize_training(batched_inputs, proposals)
+
+        losses = {}
+        losses.update(detector_losses)
+        losses.update(proposal_losses)
+        return losses
+
+    def inference(
+        self,
+        batched_inputs: Tuple[Dict[str, torch.Tensor]],
+        detected_instances: Optional[List[Instances]] = None,
+        do_postprocess: bool = True,
+    ):
+        """
+        Run inference on the given inputs.
+
+        Args:
+            batched_inputs (list[dict]): same as in :meth:`forward`
+            detected_instances (None or list[Instances]): if not None, it
+                contains an `Instances` object per image. The `Instances`
+                object contains "pred_boxes" and "pred_classes" which are
+                known boxes in the image.
+                The inference will then skip the detection of bounding boxes,
+                and only predict other per-ROI outputs.
+            do_postprocess (bool): whether to apply post-processing on the outputs.
+
+        Returns:
+            When do_postprocess=True, same as in :meth:`forward`.
+            Otherwise, a list[Instances] containing raw network outputs.
+        """
+        assert not self.training
+
+        images = self.preprocess_image(batched_inputs)
+        features = self.backbone(images.tensor)
+
+        if detected_instances is None:
+            if self.proposal_generator is not None:
+                proposals, _ = self.proposal_generator(images, features, None)
+            else:
+                assert "proposals" in batched_inputs[0]
+                proposals = [x["proposals"].to(self.device) for x in batched_inputs]
+
+            results, _ = self.roi_heads(images, features, proposals, None)
+        else:
+            detected_instances = [x.to(self.device) for x in detected_instances]
+            results = self.roi_heads.forward_with_given_boxes(features, detected_instances)
+
+        if do_postprocess:
+            assert not torch.jit.is_scripting(), "Scripting is not supported for postprocess."
+            return GeneralizedRCNN._postprocess(results, batched_inputs, images.image_sizes)
+        else:
+            return results
+
+    def preprocess_image(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
+        """
+        Normalize, pad and batch the input images.
+        """
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
+        return images
+
+    @staticmethod
+    def _postprocess(instances, batched_inputs: Tuple[Dict[str, torch.Tensor]], image_sizes):
+        """
+        Rescale the output instances to the target size.
+        """
+        # note: private function; subject to changes
+        processed_results = []
+        for results_per_image, input_per_image, image_size in zip(
+            instances, batched_inputs, image_sizes
+        ):
+            height = input_per_image.get("height", image_size[0])
+            width = input_per_image.get("width", image_size[1])
+            r = detector_postprocess(results_per_image, height, width)
+            processed_results.append({"instances": r})
+        return processed_results
+
+
+@META_ARCH_REGISTRY.register()
+class ProposalNetwork(nn.Module):
+    """
+    A meta architecture that only predicts object proposals.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        backbone: Backbone,
+        proposal_generator: nn.Module,
+        pixel_mean: Tuple[float],
+        pixel_std: Tuple[float],
+    ):
+        """
+        Args:
+            backbone: a backbone module, must follow detectron2's backbone interface
+            proposal_generator: a module that generates proposals using backbone features
+            pixel_mean, pixel_std: list or tuple with #channels element, representing
+                the per-channel mean and std to be used to normalize the input image
+        """
+        super().__init__()
+        self.backbone = backbone
+        self.proposal_generator = proposal_generator
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+
+    @classmethod
+    def from_config(cls, cfg):
+        backbone = build_backbone(cfg)
+        return {
+            "backbone": backbone,
+            "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()),
+            "pixel_mean": cfg.MODEL.PIXEL_MEAN,
+            "pixel_std": cfg.MODEL.PIXEL_STD,
+        }
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def forward(self, batched_inputs):
+        """
+        Args:
+            Same as in :class:`GeneralizedRCNN.forward`
+
+        Returns:
+            list[dict]:
+                Each dict is the output for one input image.
+                The dict contains one key "proposals" whose value is a
+                :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
+        """
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
+        features = self.backbone(images.tensor)
+
+        if "instances" in batched_inputs[0]:
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+        elif "targets" in batched_inputs[0]:
+            log_first_n(
+                logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
+            )
+            gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
+        else:
+            gt_instances = None
+        proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
+        # In training, the proposals are not useful at all but we generate them anyway.
+        # This makes RPN-only models about 5% slower.
+        if self.training:
+            return proposal_losses
+
+        processed_results = []
+        for results_per_image, input_per_image, image_size in zip(
+            proposals, batched_inputs, images.image_sizes
+        ):
+            height = input_per_image.get("height", image_size[0])
+            width = input_per_image.get("width", image_size[1])
+            r = detector_postprocess(results_per_image, height, width)
+            processed_results.append({"proposals": r})
+        return processed_results
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/retinanet.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/retinanet.py
new file mode 100644
index 0000000000000000000000000000000000000000..20cff9e3ca581e0f49d87882de803edbf2acb8d0
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/retinanet.py
@@ -0,0 +1,609 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import math
+import numpy as np
+from typing import Dict, List, Tuple
+import torch
+from fvcore.nn import sigmoid_focal_loss_jit
+from torch import Tensor, nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.data.detection_utils import convert_image_to_rgb
+from detectron2.layers import ShapeSpec, batched_nms, cat, get_norm, nonzero_tuple
+from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou
+from detectron2.utils.events import get_event_storage
+
+from ..anchor_generator import build_anchor_generator
+from ..backbone import Backbone, build_backbone
+from ..box_regression import Box2BoxTransform, _dense_box_regression_loss
+from ..matcher import Matcher
+from ..postprocessing import detector_postprocess
+from .build import META_ARCH_REGISTRY
+
+__all__ = ["RetinaNet"]
+
+
+logger = logging.getLogger(__name__)
+
+
+def permute_to_N_HWA_K(tensor, K: int):
+    """
+    Transpose/reshape a tensor from (N, (Ai x K), H, W) to (N, (HxWxAi), K)
+    """
+    assert tensor.dim() == 4, tensor.shape
+    N, _, H, W = tensor.shape
+    tensor = tensor.view(N, -1, K, H, W)
+    tensor = tensor.permute(0, 3, 4, 1, 2)
+    tensor = tensor.reshape(N, -1, K)  # Size=(N,HWA,K)
+    return tensor
+
+
+@META_ARCH_REGISTRY.register()
+class RetinaNet(nn.Module):
+    """
+    Implement RetinaNet in :paper:`RetinaNet`.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        backbone: Backbone,
+        head: nn.Module,
+        head_in_features,
+        anchor_generator,
+        box2box_transform,
+        anchor_matcher,
+        num_classes,
+        focal_loss_alpha=0.25,
+        focal_loss_gamma=2.0,
+        smooth_l1_beta=0.0,
+        box_reg_loss_type="smooth_l1",
+        test_score_thresh=0.05,
+        test_topk_candidates=1000,
+        test_nms_thresh=0.5,
+        max_detections_per_image=100,
+        pixel_mean,
+        pixel_std,
+        vis_period=0,
+        input_format="BGR",
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            backbone: a backbone module, must follow detectron2's backbone interface
+            head (nn.Module): a module that predicts logits and regression deltas
+                for each level from a list of per-level features
+            head_in_features (Tuple[str]): Names of the input feature maps to be used in head
+            anchor_generator (nn.Module): a module that creates anchors from a
+                list of features. Usually an instance of :class:`AnchorGenerator`
+            box2box_transform (Box2BoxTransform): defines the transform from anchors boxes to
+                instance boxes
+            anchor_matcher (Matcher): label the anchors by matching them with ground truth.
+            num_classes (int): number of classes. Used to label background proposals.
+
+            # Loss parameters:
+            focal_loss_alpha (float): focal_loss_alpha
+            focal_loss_gamma (float): focal_loss_gamma
+            smooth_l1_beta (float): smooth_l1_beta
+            box_reg_loss_type (str): Options are "smooth_l1", "giou"
+
+            # Inference parameters:
+            test_score_thresh (float): Inference cls score threshold, only anchors with
+                score > INFERENCE_TH are considered for inference (to improve speed)
+            test_topk_candidates (int): Select topk candidates before NMS
+            test_nms_thresh (float): Overlap threshold used for non-maximum suppression
+                (suppress boxes with IoU >= this threshold)
+            max_detections_per_image (int):
+                Maximum number of detections to return per image during inference
+                (100 is based on the limit established for the COCO dataset).
+
+            # Input parameters
+            pixel_mean (Tuple[float]):
+                Values to be used for image normalization (BGR order).
+                To train on images of different number of channels, set different mean & std.
+                Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
+            pixel_std (Tuple[float]):
+                When using pre-trained models in Detectron1 or any MSRA models,
+                std has been absorbed into its conv1 weights, so the std needs to be set 1.
+                Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
+            vis_period (int):
+                The period (in terms of steps) for minibatch visualization at train time.
+                Set to 0 to disable.
+            input_format (str): Whether the model needs RGB, YUV, HSV etc.
+        """
+        super().__init__()
+
+        self.backbone = backbone
+        self.head = head
+        self.head_in_features = head_in_features
+        if len(self.backbone.output_shape()) != len(self.head_in_features):
+            logger.warning("[RetinaNet] Backbone produces unused features.")
+
+        # Anchors
+        self.anchor_generator = anchor_generator
+        self.box2box_transform = box2box_transform
+        self.anchor_matcher = anchor_matcher
+
+        self.num_classes = num_classes
+        # Loss parameters:
+        self.focal_loss_alpha = focal_loss_alpha
+        self.focal_loss_gamma = focal_loss_gamma
+        self.smooth_l1_beta = smooth_l1_beta
+        self.box_reg_loss_type = box_reg_loss_type
+        # Inference parameters:
+        self.test_score_thresh = test_score_thresh
+        self.test_topk_candidates = test_topk_candidates
+        self.test_nms_thresh = test_nms_thresh
+        self.max_detections_per_image = max_detections_per_image
+        # Vis parameters
+        self.vis_period = vis_period
+        self.input_format = input_format
+
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+
+        """
+        In Detectron1, loss is normalized by number of foreground samples in the batch.
+        When batch size is 1 per GPU, #foreground has a large variance and
+        using it lead to lower performance. Here we maintain an EMA of #foreground to
+        stabilize the normalizer.
+        """
+        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
+        self.loss_normalizer_momentum = 0.9
+
+    @classmethod
+    def from_config(cls, cfg):
+        backbone = build_backbone(cfg)
+        backbone_shape = backbone.output_shape()
+        feature_shapes = [backbone_shape[f] for f in cfg.MODEL.RETINANET.IN_FEATURES]
+        head = RetinaNetHead(cfg, feature_shapes)
+        anchor_generator = build_anchor_generator(cfg, feature_shapes)
+        return {
+            "backbone": backbone,
+            "head": head,
+            "anchor_generator": anchor_generator,
+            "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS),
+            "anchor_matcher": Matcher(
+                cfg.MODEL.RETINANET.IOU_THRESHOLDS,
+                cfg.MODEL.RETINANET.IOU_LABELS,
+                allow_low_quality_matches=True,
+            ),
+            "pixel_mean": cfg.MODEL.PIXEL_MEAN,
+            "pixel_std": cfg.MODEL.PIXEL_STD,
+            "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES,
+            "head_in_features": cfg.MODEL.RETINANET.IN_FEATURES,
+            # Loss parameters:
+            "focal_loss_alpha": cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA,
+            "focal_loss_gamma": cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA,
+            "smooth_l1_beta": cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA,
+            "box_reg_loss_type": cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE,
+            # Inference parameters:
+            "test_score_thresh": cfg.MODEL.RETINANET.SCORE_THRESH_TEST,
+            "test_topk_candidates": cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST,
+            "test_nms_thresh": cfg.MODEL.RETINANET.NMS_THRESH_TEST,
+            "max_detections_per_image": cfg.TEST.DETECTIONS_PER_IMAGE,
+            # Vis parameters
+            "vis_period": cfg.VIS_PERIOD,
+            "input_format": cfg.INPUT.FORMAT,
+        }
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def visualize_training(self, batched_inputs, results):
+        """
+        A function used to visualize ground truth images and final network predictions.
+        It shows ground truth bounding boxes on the original image and up to 20
+        predicted object bounding boxes on the original image.
+
+        Args:
+            batched_inputs (list): a list that contains input to the model.
+            results (List[Instances]): a list of #images elements.
+        """
+        from detectron2.utils.visualizer import Visualizer
+
+        assert len(batched_inputs) == len(
+            results
+        ), "Cannot visualize inputs and results of different sizes"
+        storage = get_event_storage()
+        max_boxes = 20
+
+        image_index = 0  # only visualize a single image
+        img = batched_inputs[image_index]["image"]
+        img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format)
+        v_gt = Visualizer(img, None)
+        v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes)
+        anno_img = v_gt.get_image()
+        processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1])
+        predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu().numpy()
+
+        v_pred = Visualizer(img, None)
+        v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes])
+        prop_img = v_pred.get_image()
+        vis_img = np.vstack((anno_img, prop_img))
+        vis_img = vis_img.transpose(2, 0, 1)
+        vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results"
+        storage.put_image(vis_name, vis_img)
+
+    def forward(self, batched_inputs: Tuple[Dict[str, Tensor]]):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
+                Each item in the list contains the inputs for one image.
+                For now, each item in the list is a dict that contains:
+
+                * image: Tensor, image in (C, H, W) format.
+                * instances: Instances
+
+                Other information that's included in the original dicts, such as:
+
+                * "height", "width" (int): the output resolution of the model, used in inference.
+                  See :meth:`postprocess` for details.
+        Returns:
+            In training, dict[str, Tensor]: mapping from a named loss to a tensor storing the
+            loss. Used during training only. In inference, the standard output format, described
+            in :doc:`/tutorials/models`.
+        """
+        images = self.preprocess_image(batched_inputs)
+        features = self.backbone(images.tensor)
+        features = [features[f] for f in self.head_in_features]
+
+        anchors = self.anchor_generator(features)
+        pred_logits, pred_anchor_deltas = self.head(features)
+        # Transpose the Hi*Wi*A dimension to the middle:
+        pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits]
+        pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas]
+
+        if self.training:
+            assert not torch.jit.is_scripting(), "Not supported"
+            assert "instances" in batched_inputs[0], "Instance annotations are missing in training!"
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+
+            gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances)
+            losses = self.losses(anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes)
+
+            if self.vis_period > 0:
+                storage = get_event_storage()
+                if storage.iter % self.vis_period == 0:
+                    results = self.inference(
+                        anchors, pred_logits, pred_anchor_deltas, images.image_sizes
+                    )
+                    self.visualize_training(batched_inputs, results)
+
+            return losses
+        else:
+            results = self.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes)
+            if torch.jit.is_scripting():
+                return results
+            processed_results = []
+            for results_per_image, input_per_image, image_size in zip(
+                results, batched_inputs, images.image_sizes
+            ):
+                height = input_per_image.get("height", image_size[0])
+                width = input_per_image.get("width", image_size[1])
+                r = detector_postprocess(results_per_image, height, width)
+                processed_results.append({"instances": r})
+            return processed_results
+
+    def losses(self, anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes):
+        """
+        Args:
+            anchors (list[Boxes]): a list of #feature level Boxes
+            gt_labels, gt_boxes: see output of :meth:`RetinaNet.label_anchors`.
+                Their shapes are (N, R) and (N, R, 4), respectively, where R is
+                the total number of anchors across levels, i.e. sum(Hi x Wi x Ai)
+            pred_logits, pred_anchor_deltas: both are list[Tensor]. Each element in the
+                list corresponds to one level and has shape (N, Hi * Wi * Ai, K or 4).
+                Where K is the number of classes used in `pred_logits`.
+
+        Returns:
+            dict[str, Tensor]:
+                mapping from a named loss to a scalar tensor
+                storing the loss. Used during training only. The dict keys are:
+                "loss_cls" and "loss_box_reg"
+        """
+        num_images = len(gt_labels)
+        gt_labels = torch.stack(gt_labels)  # (N, R)
+
+        valid_mask = gt_labels >= 0
+        pos_mask = (gt_labels >= 0) & (gt_labels != self.num_classes)
+        num_pos_anchors = pos_mask.sum().item()
+        get_event_storage().put_scalar("num_pos_anchors", num_pos_anchors / num_images)
+        self.loss_normalizer = self.loss_normalizer_momentum * self.loss_normalizer + (
+            1 - self.loss_normalizer_momentum
+        ) * max(num_pos_anchors, 1)
+
+        # classification and regression loss
+        gt_labels_target = F.one_hot(gt_labels[valid_mask], num_classes=self.num_classes + 1)[
+            :, :-1
+        ]  # no loss for the last (background) class
+        loss_cls = sigmoid_focal_loss_jit(
+            cat(pred_logits, dim=1)[valid_mask],
+            gt_labels_target.to(pred_logits[0].dtype),
+            alpha=self.focal_loss_alpha,
+            gamma=self.focal_loss_gamma,
+            reduction="sum",
+        )
+
+        loss_box_reg = _dense_box_regression_loss(
+            anchors,
+            self.box2box_transform,
+            pred_anchor_deltas,
+            gt_boxes,
+            pos_mask,
+            box_reg_loss_type=self.box_reg_loss_type,
+            smooth_l1_beta=self.smooth_l1_beta,
+        )
+
+        return {
+            "loss_cls": loss_cls / self.loss_normalizer,
+            "loss_box_reg": loss_box_reg / self.loss_normalizer,
+        }
+
+    @torch.no_grad()
+    def label_anchors(self, anchors, gt_instances):
+        """
+        Args:
+            anchors (list[Boxes]): A list of #feature level Boxes.
+                The Boxes contains anchors of this image on the specific feature level.
+            gt_instances (list[Instances]): a list of N `Instances`s. The i-th
+                `Instances` contains the ground-truth per-instance annotations
+                for the i-th input image.
+
+        Returns:
+            list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is
+            the total number of anchors across all feature maps (sum(Hi * Wi * A)).
+            Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background.
+
+            list[Tensor]: i-th element is a Rx4 tensor, where R is the total number of anchors
+            across feature maps. The values are the matched gt boxes for each anchor.
+            Values are undefined for those anchors not labeled as foreground.
+        """
+        anchors = Boxes.cat(anchors)  # Rx4
+
+        gt_labels = []
+        matched_gt_boxes = []
+        for gt_per_image in gt_instances:
+            match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors)
+            matched_idxs, anchor_labels = self.anchor_matcher(match_quality_matrix)
+            del match_quality_matrix
+
+            if len(gt_per_image) > 0:
+                matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs]
+
+                gt_labels_i = gt_per_image.gt_classes[matched_idxs]
+                # Anchors with label 0 are treated as background.
+                gt_labels_i[anchor_labels == 0] = self.num_classes
+                # Anchors with label -1 are ignored.
+                gt_labels_i[anchor_labels == -1] = -1
+            else:
+                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
+                gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes
+
+            gt_labels.append(gt_labels_i)
+            matched_gt_boxes.append(matched_gt_boxes_i)
+
+        return gt_labels, matched_gt_boxes
+
+    def inference(
+        self,
+        anchors: List[Boxes],
+        pred_logits: List[Tensor],
+        pred_anchor_deltas: List[Tensor],
+        image_sizes: List[Tuple[int, int]],
+    ):
+        """
+        Arguments:
+            anchors (list[Boxes]): A list of #feature level Boxes.
+                The Boxes contain anchors of this image on the specific feature level.
+            pred_logits, pred_anchor_deltas: list[Tensor], one per level. Each
+                has shape (N, Hi * Wi * Ai, K or 4)
+            image_sizes (List[(h, w)]): the input image sizes
+
+        Returns:
+            results (List[Instances]): a list of #images elements.
+        """
+        results: List[Instances] = []
+        for img_idx, image_size in enumerate(image_sizes):
+            pred_logits_per_image = [x[img_idx] for x in pred_logits]
+            deltas_per_image = [x[img_idx] for x in pred_anchor_deltas]
+            results_per_image = self.inference_single_image(
+                anchors, pred_logits_per_image, deltas_per_image, image_size
+            )
+            results.append(results_per_image)
+        return results
+
+    def inference_single_image(
+        self,
+        anchors: List[Boxes],
+        box_cls: List[Tensor],
+        box_delta: List[Tensor],
+        image_size: Tuple[int, int],
+    ):
+        """
+        Single-image inference. Return bounding-box detection results by thresholding
+        on scores and applying non-maximum suppression (NMS).
+
+        Arguments:
+            anchors (list[Boxes]): list of #feature levels. Each entry contains
+                a Boxes object, which contains all the anchors in that feature level.
+            box_cls (list[Tensor]): list of #feature levels. Each entry contains
+                tensor of size (H x W x A, K)
+            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
+            image_size (tuple(H, W)): a tuple of the image height and width.
+
+        Returns:
+            Same as `inference`, but for only one image.
+        """
+        boxes_all = []
+        scores_all = []
+        class_idxs_all = []
+
+        # Iterate over every feature level
+        for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors):
+            # (HxWxAxK,)
+            predicted_prob = box_cls_i.flatten().sigmoid_()
+
+            # Apply two filtering below to make NMS faster.
+            # 1. Keep boxes with confidence score higher than threshold
+            keep_idxs = predicted_prob > self.test_score_thresh
+            predicted_prob = predicted_prob[keep_idxs]
+            topk_idxs = nonzero_tuple(keep_idxs)[0]
+
+            # 2. Keep top k top scoring boxes only
+            num_topk = min(self.test_topk_candidates, topk_idxs.size(0))
+            # torch.sort is actually faster than .topk (at least on GPUs)
+            predicted_prob, idxs = predicted_prob.sort(descending=True)
+            predicted_prob = predicted_prob[:num_topk]
+            topk_idxs = topk_idxs[idxs[:num_topk]]
+
+            anchor_idxs = topk_idxs // self.num_classes
+            classes_idxs = topk_idxs % self.num_classes
+
+            box_reg_i = box_reg_i[anchor_idxs]
+            anchors_i = anchors_i[anchor_idxs]
+            # predict boxes
+            predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor)
+
+            boxes_all.append(predicted_boxes)
+            scores_all.append(predicted_prob)
+            class_idxs_all.append(classes_idxs)
+
+        boxes_all, scores_all, class_idxs_all = [
+            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
+        ]
+        keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.test_nms_thresh)
+        keep = keep[: self.max_detections_per_image]
+
+        result = Instances(image_size)
+        result.pred_boxes = Boxes(boxes_all[keep])
+        result.scores = scores_all[keep]
+        result.pred_classes = class_idxs_all[keep]
+        return result
+
+    def preprocess_image(self, batched_inputs: Tuple[Dict[str, Tensor]]):
+        """
+        Normalize, pad and batch the input images.
+        """
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
+        return images
+
+
+class RetinaNetHead(nn.Module):
+    """
+    The head used in RetinaNet for object classification and box regression.
+    It has two subnets for the two tasks, with a common structure but separate parameters.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        input_shape: List[ShapeSpec],
+        num_classes,
+        num_anchors,
+        conv_dims: List[int],
+        norm="",
+        prior_prob=0.01,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (List[ShapeSpec]): input shape
+            num_classes (int): number of classes. Used to label background proposals.
+            num_anchors (int): number of generated anchors
+            conv_dims (List[int]): dimensions for each convolution layer
+            norm (str or callable):
+                    Normalization for conv layers except for the two output layers.
+                    See :func:`detectron2.layers.get_norm` for supported types.
+            prior_prob (float): Prior weight for computing bias
+        """
+        super().__init__()
+
+        if norm == "BN" or norm == "SyncBN":
+            logger.warning("Shared norm does not work well for BN, SyncBN, expect poor results")
+
+        cls_subnet = []
+        bbox_subnet = []
+        for in_channels, out_channels in zip(
+            [input_shape[0].channels] + list(conv_dims), conv_dims
+        ):
+            cls_subnet.append(
+                nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+            )
+            if norm:
+                cls_subnet.append(get_norm(norm, out_channels))
+            cls_subnet.append(nn.ReLU())
+            bbox_subnet.append(
+                nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+            )
+            if norm:
+                bbox_subnet.append(get_norm(norm, out_channels))
+            bbox_subnet.append(nn.ReLU())
+
+        self.cls_subnet = nn.Sequential(*cls_subnet)
+        self.bbox_subnet = nn.Sequential(*bbox_subnet)
+        self.cls_score = nn.Conv2d(
+            conv_dims[-1], num_anchors * num_classes, kernel_size=3, stride=1, padding=1
+        )
+        self.bbox_pred = nn.Conv2d(
+            conv_dims[-1], num_anchors * 4, kernel_size=3, stride=1, padding=1
+        )
+
+        # Initialization
+        for modules in [self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred]:
+            for layer in modules.modules():
+                if isinstance(layer, nn.Conv2d):
+                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
+                    torch.nn.init.constant_(layer.bias, 0)
+
+        # Use prior in model initialization to improve stability
+        bias_value = -(math.log((1 - prior_prob) / prior_prob))
+        torch.nn.init.constant_(self.cls_score.bias, bias_value)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: List[ShapeSpec]):
+        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
+        assert (
+            len(set(num_anchors)) == 1
+        ), "Using different number of anchors between levels is not currently supported!"
+        num_anchors = num_anchors[0]
+
+        return {
+            "input_shape": input_shape,
+            "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES,
+            "conv_dims": [input_shape[0].channels] * cfg.MODEL.RETINANET.NUM_CONVS,
+            "prior_prob": cfg.MODEL.RETINANET.PRIOR_PROB,
+            "norm": cfg.MODEL.RETINANET.NORM,
+            "num_anchors": num_anchors,
+        }
+
+    def forward(self, features: List[Tensor]):
+        """
+        Arguments:
+            features (list[Tensor]): FPN feature map tensors in high to low resolution.
+                Each tensor in the list correspond to different feature levels.
+
+        Returns:
+            logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi).
+                The tensor predicts the classification probability
+                at each spatial position for each of the A anchors and K object
+                classes.
+            bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi).
+                The tensor predicts 4-vector (dx,dy,dw,dh) box
+                regression values for every anchor. These values are the
+                relative offset between the anchor and the ground truth box.
+        """
+        logits = []
+        bbox_reg = []
+        for feature in features:
+            logits.append(self.cls_score(self.cls_subnet(feature)))
+            bbox_reg.append(self.bbox_pred(self.bbox_subnet(feature)))
+        return logits, bbox_reg
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/semantic_seg.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/semantic_seg.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7ed07e3659d1fb7ffd143c30c4c3ac2e7fa523e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/meta_arch/semantic_seg.py
@@ -0,0 +1,250 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from typing import Callable, Dict, Optional, Tuple, Union
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.structures import ImageList
+from detectron2.utils.registry import Registry
+
+from ..backbone import Backbone, build_backbone
+from ..postprocessing import sem_seg_postprocess
+from .build import META_ARCH_REGISTRY
+
+__all__ = ["SemanticSegmentor", "SEM_SEG_HEADS_REGISTRY", "SemSegFPNHead", "build_sem_seg_head"]
+
+
+SEM_SEG_HEADS_REGISTRY = Registry("SEM_SEG_HEADS")
+SEM_SEG_HEADS_REGISTRY.__doc__ = """
+Registry for semantic segmentation heads, which make semantic segmentation predictions
+from feature maps.
+"""
+
+
+@META_ARCH_REGISTRY.register()
+class SemanticSegmentor(nn.Module):
+    """
+    Main class for semantic segmentation architectures.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        backbone: Backbone,
+        sem_seg_head: nn.Module,
+        pixel_mean: Tuple[float],
+        pixel_std: Tuple[float]
+    ):
+        """
+        Args:
+            backbone: a backbone module, must follow detectron2's backbone interface
+            sem_seg_head: a module that predicts semantic segmentation from backbone features
+            pixel_mean, pixel_std: list or tuple with #channels element, representing
+                the per-channel mean and std to be used to normalize the input image
+        """
+        super().__init__()
+        self.backbone = backbone
+        self.sem_seg_head = sem_seg_head
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+
+    @classmethod
+    def from_config(cls, cfg):
+        backbone = build_backbone(cfg)
+        sem_seg_head = build_sem_seg_head(cfg, backbone.output_shape())
+        return {
+            "backbone": backbone,
+            "sem_seg_head": sem_seg_head,
+            "pixel_mean": cfg.MODEL.PIXEL_MEAN,
+            "pixel_std": cfg.MODEL.PIXEL_STD,
+        }
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def forward(self, batched_inputs):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
+                Each item in the list contains the inputs for one image.
+
+                For now, each item in the list is a dict that contains:
+
+                   * "image": Tensor, image in (C, H, W) format.
+                   * "sem_seg": semantic segmentation ground truth
+                   * Other information that's included in the original dicts, such as:
+                     "height", "width" (int): the output resolution of the model (may be different
+                     from input resolution), used in inference.
+
+
+        Returns:
+            list[dict]:
+              Each dict is the output for one input image.
+              The dict contains one key "sem_seg" whose value is a
+              Tensor that represents the
+              per-pixel segmentation prediced by the head.
+              The prediction has shape KxHxW that represents the logits of
+              each class for each pixel.
+        """
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
+
+        features = self.backbone(images.tensor)
+
+        if "sem_seg" in batched_inputs[0]:
+            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
+            targets = ImageList.from_tensors(
+                targets, self.backbone.size_divisibility, self.sem_seg_head.ignore_value
+            ).tensor
+        else:
+            targets = None
+        results, losses = self.sem_seg_head(features, targets)
+
+        if self.training:
+            return losses
+
+        processed_results = []
+        for result, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes):
+            height = input_per_image.get("height")
+            width = input_per_image.get("width")
+            r = sem_seg_postprocess(result, image_size, height, width)
+            processed_results.append({"sem_seg": r})
+        return processed_results
+
+
+def build_sem_seg_head(cfg, input_shape):
+    """
+    Build a semantic segmentation head from `cfg.MODEL.SEM_SEG_HEAD.NAME`.
+    """
+    name = cfg.MODEL.SEM_SEG_HEAD.NAME
+    return SEM_SEG_HEADS_REGISTRY.get(name)(cfg, input_shape)
+
+
+@SEM_SEG_HEADS_REGISTRY.register()
+class SemSegFPNHead(nn.Module):
+    """
+    A semantic segmentation head described in :paper:`PanopticFPN`.
+    It takes a list of FPN features as input, and applies a sequence of
+    3x3 convs and upsampling to scale all of them to the stride defined by
+    ``common_stride``. Then these features are added and used to make final
+    predictions by another 1x1 conv layer.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        input_shape: Dict[str, ShapeSpec],
+        *,
+        num_classes: int,
+        conv_dims: int,
+        common_stride: int,
+        loss_weight: float = 1.0,
+        norm: Optional[Union[str, Callable]] = None,
+        ignore_value: int = -1
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape: shapes (channels and stride) of the input features
+            num_classes: number of classes to predict
+            conv_dims: number of output channels for the intermediate conv layers.
+            common_stride: the common stride that all features will be upscaled to
+            loss_weight: loss weight
+            norm (str or callable): normalization for all conv layers
+            ignore_value: category id to be ignored during training.
+        """
+        super().__init__()
+        input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride)
+        self.in_features = [k for k, v in input_shape]
+        feature_strides = [v.stride for k, v in input_shape]
+        feature_channels = [v.channels for k, v in input_shape]
+
+        self.ignore_value = ignore_value
+        self.common_stride = common_stride
+        self.loss_weight = loss_weight
+
+        self.scale_heads = []
+        for in_feature, stride, channels in zip(
+            self.in_features, feature_strides, feature_channels
+        ):
+            head_ops = []
+            head_length = max(1, int(np.log2(stride) - np.log2(self.common_stride)))
+            for k in range(head_length):
+                norm_module = get_norm(norm, conv_dims)
+                conv = Conv2d(
+                    channels if k == 0 else conv_dims,
+                    conv_dims,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias=not norm,
+                    norm=norm_module,
+                    activation=F.relu,
+                )
+                weight_init.c2_msra_fill(conv)
+                head_ops.append(conv)
+                if stride != self.common_stride:
+                    head_ops.append(
+                        nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
+                    )
+            self.scale_heads.append(nn.Sequential(*head_ops))
+            self.add_module(in_feature, self.scale_heads[-1])
+        self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
+        weight_init.c2_msra_fill(self.predictor)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
+        return {
+            "input_shape": {
+                k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
+            },
+            "ignore_value": cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
+            "num_classes": cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
+            "conv_dims": cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM,
+            "common_stride": cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE,
+            "norm": cfg.MODEL.SEM_SEG_HEAD.NORM,
+            "loss_weight": cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT,
+        }
+
+    def forward(self, features, targets=None):
+        """
+        Returns:
+            In training, returns (None, dict of losses)
+            In inference, returns (CxHxW logits, {})
+        """
+        x = self.layers(features)
+        if self.training:
+            return None, self.losses(x, targets)
+        else:
+            x = F.interpolate(
+                x, scale_factor=self.common_stride, mode="bilinear", align_corners=False
+            )
+            return x, {}
+
+    def layers(self, features):
+        for i, f in enumerate(self.in_features):
+            if i == 0:
+                x = self.scale_heads[i](features[f])
+            else:
+                x = x + self.scale_heads[i](features[f])
+        x = self.predictor(x)
+        return x
+
+    def losses(self, predictions, targets):
+        predictions = predictions.float()  # https://github.com/pytorch/pytorch/issues/48163
+        predictions = F.interpolate(
+            predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False
+        )
+        loss = F.cross_entropy(
+            predictions, targets, reduction="mean", ignore_index=self.ignore_value
+        )
+        losses = {"loss_sem_seg": loss * self.loss_weight}
+        return losses
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/mmdet_wrapper.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/mmdet_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..7905fbcba481ebd176c7c4f3c323fd982706e7c1
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/mmdet_wrapper.py
@@ -0,0 +1,270 @@
+# -*- coding: utf-8 -*-
+
+import itertools
+import logging
+import numpy as np
+from collections import OrderedDict
+from collections.abc import Mapping
+from typing import Dict, List, Optional, Tuple, Union
+import torch
+from omegaconf import DictConfig, OmegaConf
+from torch import Tensor, nn
+
+from detectron2.layers import ShapeSpec
+from detectron2.structures import BitMasks, Boxes, ImageList, Instances
+from detectron2.utils.events import get_event_storage
+
+from .backbone import Backbone
+
+logger = logging.getLogger(__name__)
+
+
+def _to_container(cfg):
+    """
+    mmdet will assert the type of dict/list.
+    So convert omegaconf objects to dict/list.
+    """
+    if isinstance(cfg, DictConfig):
+        cfg = OmegaConf.to_container(cfg, resolve=True)
+    from mmcv.utils import ConfigDict
+
+    return ConfigDict(cfg)
+
+
+class MMDetBackbone(Backbone):
+    """
+    Wrapper of mmdetection backbones to use in detectron2.
+
+    mmdet backbones produce list/tuple of tensors, while detectron2 backbones
+    produce a dict of tensors. This class wraps the given backbone to produce
+    output in detectron2's convention, so it can be used in place of detectron2
+    backbones.
+    """
+
+    def __init__(
+        self,
+        backbone: Union[nn.Module, Mapping],
+        neck: Union[nn.Module, Mapping, None] = None,
+        *,
+        pretrained_backbone: Optional[str] = None,
+        output_shapes: List[ShapeSpec],
+        output_names: Optional[List[str]] = None,
+    ):
+        """
+        Args:
+            backbone: either a backbone module or a mmdet config dict that defines a
+                backbone. The backbone takes a 4D image tensor and returns a
+                sequence of tensors.
+            neck: either a backbone module or a mmdet config dict that defines a
+                neck. The neck takes outputs of backbone and returns a
+                sequence of tensors. If None, no neck is used.
+            pretrained_backbone: defines the backbone weights that can be loaded by
+                mmdet, such as "torchvision://resnet50".
+            output_shapes: shape for every output of the backbone (or neck, if given).
+                stride and channels are often needed.
+            output_names: names for every output of the backbone (or neck, if given).
+                By default, will use "out0", "out1", ...
+        """
+        super().__init__()
+        if isinstance(backbone, Mapping):
+            from mmdet.models import build_backbone
+
+            backbone = build_backbone(_to_container(backbone))
+        self.backbone = backbone
+
+        if isinstance(neck, Mapping):
+            from mmdet.models import build_neck
+
+            neck = build_neck(_to_container(neck))
+        self.neck = neck
+
+        # It's confusing that backbone weights are given as a separate argument,
+        # but "neck" weights, if any, are part of neck itself. This is the interface
+        # of mmdet so we follow it. Reference:
+        # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py
+        logger.info(f"Initializing mmdet backbone weights: {pretrained_backbone} ...")
+        self.backbone.init_weights(pretrained_backbone)
+        # train() in mmdet modules is non-trivial, and has to be explicitly
+        # called. Reference:
+        # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/backbones/resnet.py
+        self.backbone.train()
+        if self.neck is not None:
+            logger.info("Initializing mmdet neck weights ...")
+            if isinstance(self.neck, nn.Sequential):
+                for m in self.neck:
+                    m.init_weights()
+            else:
+                self.neck.init_weights()
+            self.neck.train()
+
+        self._output_shapes = output_shapes
+        if not output_names:
+            output_names = [f"out{i}" for i in range(len(output_shapes))]
+        self._output_names = output_names
+
+    def forward(self, x) -> Dict[str, Tensor]:
+        outs = self.backbone(x)
+        if self.neck is not None:
+            outs = self.neck(outs)
+        assert isinstance(
+            outs, (list, tuple)
+        ), "mmdet backbone should return a list/tuple of tensors!"
+        if len(outs) != len(self._output_shapes):
+            raise ValueError(
+                "Length of output_shapes does not match outputs from the mmdet backbone: "
+                f"{len(outs)} != {len(self._output_shapes)}"
+            )
+        return {k: v for k, v in zip(self._output_names, outs)}
+
+    def output_shape(self) -> Dict[str, ShapeSpec]:
+        return {k: v for k, v in zip(self._output_names, self._output_shapes)}
+
+
+class MMDetDetector(nn.Module):
+    """
+    Wrapper of a mmdetection detector model, for detection and instance segmentation.
+    Input/output formats of this class follow detectron2's convention, so a
+    mmdetection model can be trained and evaluated in detectron2.
+    """
+
+    def __init__(
+        self,
+        detector: Union[nn.Module, Mapping],
+        *,
+        # Default is 32 regardless of model:
+        # https://github.com/open-mmlab/mmdetection/tree/master/configs/_base_/datasets
+        size_divisibility=32,
+        pixel_mean: Tuple[float],
+        pixel_std: Tuple[float],
+    ):
+        """
+        Args:
+            detector: a mmdet detector, or a mmdet config dict that defines a detector.
+            size_divisibility: pad input images to multiple of this number
+            pixel_mean: per-channel mean to normalize input image
+            pixel_std: per-channel stddev to normalize input image
+        """
+        super().__init__()
+        if isinstance(detector, Mapping):
+            from mmdet.models import build_detector
+
+            detector = build_detector(_to_container(detector))
+        self.detector = detector
+        self.size_divisibility = size_divisibility
+
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+        assert (
+            self.pixel_mean.shape == self.pixel_std.shape
+        ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!"
+
+    def forward(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, size_divisibility=self.size_divisibility).tensor
+        metas = []
+        rescale = {"height" in x for x in batched_inputs}
+        if len(rescale) != 1:
+            raise ValueError("Some inputs have original height/width, but some don't!")
+        rescale = list(rescale)[0]
+        output_shapes = []
+        for input in batched_inputs:
+            meta = {}
+            c, h, w = input["image"].shape
+            meta["img_shape"] = meta["ori_shape"] = (h, w, c)
+            if rescale:
+                scale_factor = np.sqrt(h * w / (input["height"] * input["width"]))
+                ori_shape = (input["height"], input["width"])
+                output_shapes.append(ori_shape)
+                meta["ori_shape"] = ori_shape + (c,)
+            else:
+                scale_factor = 1.0
+                output_shapes.append((h, w))
+            meta["scale_factor"] = scale_factor
+            meta["flip"] = False
+            padh, padw = images.shape[-2:]
+            meta["pad_shape"] = (padh, padw, c)
+            metas.append(meta)
+
+        if self.training:
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+            if gt_instances[0].has("gt_masks"):
+                from mmdet.core import PolygonMasks as mm_PolygonMasks, BitmapMasks as mm_BitMasks
+
+                def convert_mask(m, shape):
+                    # mmdet mask format
+                    if isinstance(m, BitMasks):
+                        return mm_BitMasks(m.tensor.cpu().numpy(), shape[0], shape[1])
+                    else:
+                        return mm_PolygonMasks(m.polygons, shape[0], shape[1])
+
+                gt_masks = [convert_mask(x.gt_masks, x.image_size) for x in gt_instances]
+            else:
+                gt_masks = None
+            losses_and_metrics = self.detector.forward_train(
+                images,
+                metas,
+                [x.gt_boxes.tensor for x in gt_instances],
+                [x.gt_classes for x in gt_instances],
+                gt_masks=gt_masks,
+            )
+            return _parse_losses(losses_and_metrics)
+        else:
+            results = self.detector.simple_test(images, metas, rescale=rescale)
+            results = [
+                {"instances": _convert_mmdet_result(r, shape)}
+                for r, shape in zip(results, output_shapes)
+            ]
+            return results
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+
+# Reference: show_result() in
+# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/base.py
+def _convert_mmdet_result(result, shape: Tuple[int, int]) -> Instances:
+    if isinstance(result, tuple):
+        bbox_result, segm_result = result
+        if isinstance(segm_result, tuple):
+            segm_result = segm_result[0]
+    else:
+        bbox_result, segm_result = result, None
+
+    bboxes = torch.from_numpy(np.vstack(bbox_result))  # Nx5
+    bboxes, scores = bboxes[:, :4], bboxes[:, -1]
+    labels = [
+        torch.full((bbox.shape[0],), i, dtype=torch.int32) for i, bbox in enumerate(bbox_result)
+    ]
+    labels = torch.cat(labels)
+    inst = Instances(shape)
+    inst.pred_boxes = Boxes(bboxes)
+    inst.scores = scores
+    inst.pred_classes = labels
+
+    if segm_result is not None and len(labels) > 0:
+        segm_result = list(itertools.chain(*segm_result))
+        segm_result = [torch.from_numpy(x) if isinstance(x, np.ndarray) else x for x in segm_result]
+        segm_result = torch.stack(segm_result, dim=0)
+        inst.pred_masks = segm_result
+    return inst
+
+
+# reference: https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/base.py
+def _parse_losses(losses: Dict[str, Tensor]) -> Dict[str, Tensor]:
+    log_vars = OrderedDict()
+    for loss_name, loss_value in losses.items():
+        if isinstance(loss_value, torch.Tensor):
+            log_vars[loss_name] = loss_value.mean()
+        elif isinstance(loss_value, list):
+            log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+        else:
+            raise TypeError(f"{loss_name} is not a tensor or list of tensors")
+
+        if "loss" not in loss_name:
+            # put metrics to storage; don't return them
+            storage = get_event_storage()
+            value = log_vars.pop(loss_name).cpu().item()
+            storage.put_scalar(loss_name, value)
+    return log_vars
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/poolers.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/poolers.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7ef14873abe48d3bcf9f3059149bc228fcf1d28
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/poolers.py
@@ -0,0 +1,258 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+from typing import List
+import torch
+from torch import nn
+from torchvision.ops import RoIPool
+
+from detectron2.layers import ROIAlign, ROIAlignRotated, cat, nonzero_tuple
+from detectron2.structures import Boxes
+
+"""
+To export ROIPooler to torchscript, in this file, variables that should be annotated with
+`Union[List[Boxes], List[RotatedBoxes]]` are only annotated with `List[Boxes]`.
+
+TODO: Correct these annotations when torchscript support `Union`.
+https://github.com/pytorch/pytorch/issues/41412
+"""
+
+__all__ = ["ROIPooler"]
+
+
+def assign_boxes_to_levels(
+    box_lists: List[Boxes],
+    min_level: int,
+    max_level: int,
+    canonical_box_size: int,
+    canonical_level: int,
+):
+    """
+    Map each box in `box_lists` to a feature map level index and return the assignment
+    vector.
+
+    Args:
+        box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes,
+            where N is the number of images in the batch.
+        min_level (int): Smallest feature map level index. The input is considered index 0,
+            the output of stage 1 is index 1, and so.
+        max_level (int): Largest feature map level index.
+        canonical_box_size (int): A canonical box size in pixels (sqrt(box area)).
+        canonical_level (int): The feature map level index on which a canonically-sized box
+            should be placed.
+
+    Returns:
+        A tensor of length M, where M is the total number of boxes aggregated over all
+            N batch images. The memory layout corresponds to the concatenation of boxes
+            from all images. Each element is the feature map index, as an offset from
+            `self.min_level`, for the corresponding box (so value i means the box is at
+            `self.min_level + i`).
+    """
+    box_sizes = torch.sqrt(cat([boxes.area() for boxes in box_lists]))
+    # Eqn.(1) in FPN paper
+    level_assignments = torch.floor(
+        canonical_level + torch.log2(box_sizes / canonical_box_size + 1e-8)
+    )
+    # clamp level to (min, max), in case the box size is too large or too small
+    # for the available feature maps
+    level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level)
+    return level_assignments.to(torch.int64) - min_level
+
+
+def _fmt_box_list(box_tensor, batch_index: int):
+    repeated_index = torch.full_like(
+        box_tensor[:, :1], batch_index, dtype=box_tensor.dtype, device=box_tensor.device
+    )
+    return cat((repeated_index, box_tensor), dim=1)
+
+
+def convert_boxes_to_pooler_format(box_lists: List[Boxes]):
+    """
+    Convert all boxes in `box_lists` to the low-level format used by ROI pooling ops
+    (see description under Returns).
+
+    Args:
+        box_lists (list[Boxes] | list[RotatedBoxes]):
+            A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
+
+    Returns:
+        When input is list[Boxes]:
+            A tensor of shape (M, 5), where M is the total number of boxes aggregated over all
+            N batch images.
+            The 5 columns are (batch index, x0, y0, x1, y1), where batch index
+            is the index in [0, N) identifying which batch image the box with corners at
+            (x0, y0, x1, y1) comes from.
+        When input is list[RotatedBoxes]:
+            A tensor of shape (M, 6), where M is the total number of boxes aggregated over all
+            N batch images.
+            The 6 columns are (batch index, x_ctr, y_ctr, width, height, angle_degrees),
+            where batch index is the index in [0, N) identifying which batch image the
+            rotated box (x_ctr, y_ctr, width, height, angle_degrees) comes from.
+    """
+    pooler_fmt_boxes = cat(
+        [_fmt_box_list(box_list.tensor, i) for i, box_list in enumerate(box_lists)], dim=0
+    )
+
+    return pooler_fmt_boxes
+
+
+class ROIPooler(nn.Module):
+    """
+    Region of interest feature map pooler that supports pooling from one or more
+    feature maps.
+    """
+
+    def __init__(
+        self,
+        output_size,
+        scales,
+        sampling_ratio,
+        pooler_type,
+        canonical_box_size=224,
+        canonical_level=4,
+    ):
+        """
+        Args:
+            output_size (int, tuple[int] or list[int]): output size of the pooled region,
+                e.g., 14 x 14. If tuple or list is given, the length must be 2.
+            scales (list[float]): The scale for each low-level pooling op relative to
+                the input image. For a feature map with stride s relative to the input
+                image, scale is defined as 1/s. The stride must be power of 2.
+                When there are multiple scales, they must form a pyramid, i.e. they must be
+                a monotically decreasing geometric sequence with a factor of 1/2.
+            sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op.
+            pooler_type (string): Name of the type of pooling operation that should be applied.
+                For instance, "ROIPool" or "ROIAlignV2".
+            canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default
+                is heuristically defined as 224 pixels in the FPN paper (based on ImageNet
+                pre-training).
+            canonical_level (int): The feature map level index from which a canonically-sized box
+                should be placed. The default is defined as level 4 (stride=16) in the FPN paper,
+                i.e., a box of size 224x224 will be placed on the feature with stride=16.
+                The box placement for all boxes will be determined from their sizes w.r.t
+                canonical_box_size. For example, a box whose area is 4x that of a canonical box
+                should be used to pool features from feature level ``canonical_level+1``.
+
+                Note that the actual input feature maps given to this module may not have
+                sufficiently many levels for the input boxes. If the boxes are too large or too
+                small for the input feature maps, the closest level will be used.
+        """
+        super().__init__()
+
+        if isinstance(output_size, int):
+            output_size = (output_size, output_size)
+        assert len(output_size) == 2
+        assert isinstance(output_size[0], int) and isinstance(output_size[1], int)
+        self.output_size = output_size
+
+        if pooler_type == "ROIAlign":
+            self.level_poolers = nn.ModuleList(
+                ROIAlign(
+                    output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=False
+                )
+                for scale in scales
+            )
+        elif pooler_type == "ROIAlignV2":
+            self.level_poolers = nn.ModuleList(
+                ROIAlign(
+                    output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=True
+                )
+                for scale in scales
+            )
+        elif pooler_type == "ROIPool":
+            self.level_poolers = nn.ModuleList(
+                RoIPool(output_size, spatial_scale=scale) for scale in scales
+            )
+        elif pooler_type == "ROIAlignRotated":
+            self.level_poolers = nn.ModuleList(
+                ROIAlignRotated(output_size, spatial_scale=scale, sampling_ratio=sampling_ratio)
+                for scale in scales
+            )
+        else:
+            raise ValueError("Unknown pooler type: {}".format(pooler_type))
+
+        # Map scale (defined as 1 / stride) to its feature map level under the
+        # assumption that stride is a power of 2.
+        min_level = -(math.log2(scales[0]))
+        max_level = -(math.log2(scales[-1]))
+        assert math.isclose(min_level, int(min_level)) and math.isclose(
+            max_level, int(max_level)
+        ), "Featuremap stride is not power of 2!"
+        self.min_level = int(min_level)
+        self.max_level = int(max_level)
+        assert (
+            len(scales) == self.max_level - self.min_level + 1
+        ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!"
+        assert 0 <= self.min_level and self.min_level <= self.max_level
+        self.canonical_level = canonical_level
+        assert canonical_box_size > 0
+        self.canonical_box_size = canonical_box_size
+
+    def forward(self, x: List[torch.Tensor], box_lists: List[Boxes]):
+        """
+        Args:
+            x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
+                used to construct this module.
+            box_lists (list[Boxes] | list[RotatedBoxes]):
+                A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
+                The box coordinates are defined on the original image and
+                will be scaled by the `scales` argument of :class:`ROIPooler`.
+
+        Returns:
+            Tensor:
+                A tensor of shape (M, C, output_size, output_size) where M is the total number of
+                boxes aggregated over all N batch images and C is the number of channels in `x`.
+        """
+        num_level_assignments = len(self.level_poolers)
+
+        assert isinstance(x, list) and isinstance(
+            box_lists, list
+        ), "Arguments to pooler must be lists"
+        assert (
+            len(x) == num_level_assignments
+        ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
+            num_level_assignments, len(x)
+        )
+
+        assert len(box_lists) == x[0].size(
+            0
+        ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
+            x[0].size(0), len(box_lists)
+        )
+        if len(box_lists) == 0:
+            return torch.zeros(
+                (0, x[0].shape[1]) + self.output_size, device=x[0].device, dtype=x[0].dtype
+            )
+
+        pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)
+
+        if num_level_assignments == 1:
+            return self.level_poolers[0](x[0], pooler_fmt_boxes)
+
+        level_assignments = assign_boxes_to_levels(
+            box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level
+        )
+
+        num_boxes = pooler_fmt_boxes.size(0)
+        num_channels = x[0].shape[1]
+        if len(self.output_size) == 1:
+            output_size = self.output_size[0]
+
+            dtype, device = x[0].dtype, x[0].device
+            output = torch.zeros(
+                (num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device
+            )
+        else:
+            output_size = self.output_size[0]
+            output_size1 = self.output_size[1]
+            dtype, device = x[0].dtype, x[0].device
+            output = torch.zeros(
+                (num_boxes, num_channels, output_size, output_size1), dtype=dtype, device=device
+            )
+
+        for level, pooler in enumerate(self.level_poolers):
+            inds = nonzero_tuple(level_assignments == level)[0]
+            pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
+            # Use index_put_ instead of advance indexing, to avoid pytorch/issues/49852
+            output.index_put_((inds,), pooler(x[level], pooler_fmt_boxes_level))
+
+        return output
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/postprocessing.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/postprocessing.py
new file mode 100644
index 0000000000000000000000000000000000000000..f42e77c52f15869dcfb426d12befa8837f404021
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/postprocessing.py
@@ -0,0 +1,101 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+from torch.nn import functional as F
+
+from detectron2.layers import paste_masks_in_image
+from detectron2.structures import Instances
+from detectron2.utils.memory import retry_if_cuda_oom
+
+
+# perhaps should rename to "resize_instance"
+def detector_postprocess(
+    results: Instances, output_height: int, output_width: int, mask_threshold: float = 0.5
+):
+    """
+    Resize the output instances.
+    The input images are often resized when entering an object detector.
+    As a result, we often need the outputs of the detector in a different
+    resolution from its inputs.
+
+    This function will resize the raw outputs of an R-CNN detector
+    to produce outputs according to the desired output resolution.
+
+    Args:
+        results (Instances): the raw outputs from the detector.
+            `results.image_size` contains the input image resolution the detector sees.
+            This object might be modified in-place.
+        output_height, output_width: the desired output resolution.
+
+    Returns:
+        Instances: the resized output from the model, based on the output resolution
+    """
+    # Change to 'if is_tracing' after PT1.7
+    if isinstance(output_height, torch.Tensor):
+        # Converts integer tensors to float temporaries to ensure true
+        # division is performed when computing scale_x and scale_y.
+        output_width_tmp = output_width.float()
+        output_height_tmp = output_height.float()
+        new_size = torch.stack([output_height, output_width])
+    else:
+        new_size = (output_height, output_width)
+        output_width_tmp = output_width
+        output_height_tmp = output_height
+
+    scale_x, scale_y = (
+        output_width_tmp / results.image_size[1],
+        output_height_tmp / results.image_size[0],
+    )
+    results = Instances(new_size, **results.get_fields())
+
+    if results.has("pred_boxes"):
+        output_boxes = results.pred_boxes
+    elif results.has("proposal_boxes"):
+        output_boxes = results.proposal_boxes
+    else:
+        output_boxes = None
+    assert output_boxes is not None, "Predictions must contain boxes!"
+
+    output_boxes.scale(scale_x, scale_y)
+    output_boxes.clip(results.image_size)
+
+    results = results[output_boxes.nonempty()]
+
+    if results.has("pred_masks"):
+        results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
+            results.pred_masks[:, 0, :, :],  # N, 1, M, M
+            results.pred_boxes,
+            results.image_size,
+            threshold=mask_threshold,
+        )
+
+    if results.has("pred_keypoints"):
+        results.pred_keypoints[:, :, 0] *= scale_x
+        results.pred_keypoints[:, :, 1] *= scale_y
+
+    return results
+
+
+def sem_seg_postprocess(result, img_size, output_height, output_width):
+    """
+    Return semantic segmentation predictions in the original resolution.
+
+    The input images are often resized when entering semantic segmentor. Moreover, in same
+    cases, they also padded inside segmentor to be divisible by maximum network stride.
+    As a result, we often need the predictions of the segmentor in a different
+    resolution from its inputs.
+
+    Args:
+        result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
+            where C is the number of classes, and H, W are the height and width of the prediction.
+        img_size (tuple): image size that segmentor is taking as input.
+        output_height, output_width: the desired output resolution.
+
+    Returns:
+        semantic segmentation prediction (Tensor): A tensor of the shape
+            (C, output_height, output_width) that contains per-pixel soft predictions.
+    """
+    result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1)
+    result = F.interpolate(
+        result, size=(output_height, output_width), mode="bilinear", align_corners=False
+    )[0]
+    return result
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..79c9acf84fd69e527deb89f1a54375fd71552ed8
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator
+from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN
+
+__all__ = list(globals().keys())
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/build.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..34eb12d00d94ff905b796e75e2c4c5845257c8e9
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/build.py
@@ -0,0 +1,24 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from detectron2.utils.registry import Registry
+
+PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR")
+PROPOSAL_GENERATOR_REGISTRY.__doc__ = """
+Registry for proposal generator, which produces object proposals from feature maps.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+The call should return a `nn.Module` object.
+"""
+
+from . import rpn, rrpn  # noqa F401 isort:skip
+
+
+def build_proposal_generator(cfg, input_shape):
+    """
+    Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`.
+    The name can be "PrecomputedProposals" to use no proposal generator.
+    """
+    name = cfg.MODEL.PROPOSAL_GENERATOR.NAME
+    if name == "PrecomputedProposals":
+        return None
+
+    return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/proposal_utils.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/proposal_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c104367417d23756a6072de48891009e6766775
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/proposal_utils.py
@@ -0,0 +1,182 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import math
+from typing import List, Tuple
+import torch
+
+from detectron2.layers import batched_nms, cat
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.env import TORCH_VERSION
+
+logger = logging.getLogger(__name__)
+
+
+def _is_tracing():
+    if torch.jit.is_scripting():
+        # https://github.com/pytorch/pytorch/issues/47379
+        return False
+    else:
+        return TORCH_VERSION >= (1, 7) and torch.jit.is_tracing()
+
+
+def find_top_rpn_proposals(
+    proposals: List[torch.Tensor],
+    pred_objectness_logits: List[torch.Tensor],
+    image_sizes: List[Tuple[int, int]],
+    nms_thresh: float,
+    pre_nms_topk: int,
+    post_nms_topk: int,
+    min_box_size: float,
+    training: bool,
+):
+    """
+    For each feature map, select the `pre_nms_topk` highest scoring proposals,
+    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
+    highest scoring proposals among all the feature maps for each image.
+
+    Args:
+        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
+            All proposal predictions on the feature maps.
+        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
+        image_sizes (list[tuple]): sizes (h, w) for each image
+        nms_thresh (float): IoU threshold to use for NMS
+        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
+            When RPN is run on multiple feature maps (as in FPN) this number is per
+            feature map.
+        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
+            When RPN is run on multiple feature maps (as in FPN) this number is total,
+            over all feature maps.
+        min_box_size (float): minimum proposal box side length in pixels (absolute units
+            wrt input images).
+        training (bool): True if proposals are to be used in training, otherwise False.
+            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
+            comment.
+
+    Returns:
+        list[Instances]: list of N Instances. The i-th Instances
+            stores post_nms_topk object proposals for image i, sorted by their
+            objectness score in descending order.
+    """
+    num_images = len(image_sizes)
+    device = proposals[0].device
+
+    # 1. Select top-k anchor for every level and every image
+    topk_scores = []  # #lvl Tensor, each of shape N x topk
+    topk_proposals = []
+    level_ids = []  # #lvl Tensor, each of shape (topk,)
+    batch_idx = torch.arange(num_images, device=device)
+    for level_id, (proposals_i, logits_i) in enumerate(zip(proposals, pred_objectness_logits)):
+        Hi_Wi_A = logits_i.shape[1]
+        if isinstance(Hi_Wi_A, torch.Tensor):  # it's a tensor in tracing
+            num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk)
+        else:
+            num_proposals_i = min(Hi_Wi_A, pre_nms_topk)
+
+        # sort is faster than topk: https://github.com/pytorch/pytorch/issues/22812
+        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
+        logits_i, idx = logits_i.sort(descending=True, dim=1)
+        topk_scores_i = logits_i.narrow(1, 0, num_proposals_i)
+        topk_idx = idx.narrow(1, 0, num_proposals_i)
+
+        # each is N x topk
+        topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx]  # N x topk x 4
+
+        topk_proposals.append(topk_proposals_i)
+        topk_scores.append(topk_scores_i)
+        level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device))
+
+    # 2. Concat all levels together
+    topk_scores = cat(topk_scores, dim=1)
+    topk_proposals = cat(topk_proposals, dim=1)
+    level_ids = cat(level_ids, dim=0)
+
+    # 3. For each image, run a per-level NMS, and choose topk results.
+    results: List[Instances] = []
+    for n, image_size in enumerate(image_sizes):
+        boxes = Boxes(topk_proposals[n])
+        scores_per_img = topk_scores[n]
+        lvl = level_ids
+
+        valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
+        if not valid_mask.all():
+            if training:
+                raise FloatingPointError(
+                    "Predicted boxes or scores contain Inf/NaN. Training has diverged."
+                )
+            boxes = boxes[valid_mask]
+            scores_per_img = scores_per_img[valid_mask]
+            lvl = lvl[valid_mask]
+        boxes.clip(image_size)
+
+        # filter empty boxes
+        keep = boxes.nonempty(threshold=min_box_size)
+        if _is_tracing() or keep.sum().item() != len(boxes):
+            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep]
+
+        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
+        # In Detectron1, there was different behavior during training vs. testing.
+        # (https://github.com/facebookresearch/Detectron/issues/459)
+        # During training, topk is over the proposals from *all* images in the training batch.
+        # During testing, it is over the proposals for each image separately.
+        # As a result, the training behavior becomes batch-dependent,
+        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
+        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
+        keep = keep[:post_nms_topk]  # keep is already sorted
+
+        res = Instances(image_size)
+        res.proposal_boxes = boxes[keep]
+        res.objectness_logits = scores_per_img[keep]
+        results.append(res)
+    return results
+
+
+def add_ground_truth_to_proposals(gt_boxes, proposals):
+    """
+    Call `add_ground_truth_to_proposals_single_image` for all images.
+
+    Args:
+        gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes
+            representing the gound-truth for image i.
+        proposals (list[Instances]): list of N elements. Element i is a Instances
+            representing the proposals for image i.
+
+    Returns:
+        list[Instances]: list of N Instances. Each is the proposals for the image,
+            with field "proposal_boxes" and "objectness_logits".
+    """
+    assert gt_boxes is not None
+
+    assert len(proposals) == len(gt_boxes)
+    if len(proposals) == 0:
+        return proposals
+
+    return [
+        add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i)
+        for gt_boxes_i, proposals_i in zip(gt_boxes, proposals)
+    ]
+
+
+def add_ground_truth_to_proposals_single_image(gt_boxes, proposals):
+    """
+    Augment `proposals` with ground-truth boxes from `gt_boxes`.
+
+    Args:
+        Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals
+        per image.
+
+    Returns:
+        Same as `add_ground_truth_to_proposals`, but for only one image.
+    """
+    device = proposals.objectness_logits.device
+    # Assign all ground-truth boxes an objectness logit corresponding to
+    # P(object) = sigmoid(logit) =~ 1.
+    gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
+    gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)
+
+    # Concatenating gt_boxes with proposals requires them to have the same fields
+    gt_proposal = Instances(proposals.image_size)
+    gt_proposal.proposal_boxes = gt_boxes
+    gt_proposal.objectness_logits = gt_logits
+    new_proposals = Instances.cat([proposals, gt_proposal])
+
+    return new_proposals
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/rpn.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/rpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..99cd536d2f9880d2049390c45f73eb22335e1b82
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/rpn.py
@@ -0,0 +1,533 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Dict, List, Optional, Tuple, Union
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ShapeSpec, cat
+from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.memory import retry_if_cuda_oom
+from detectron2.utils.registry import Registry
+
+from ..anchor_generator import build_anchor_generator
+from ..box_regression import Box2BoxTransform, _dense_box_regression_loss
+from ..matcher import Matcher
+from ..sampling import subsample_labels
+from .build import PROPOSAL_GENERATOR_REGISTRY
+from .proposal_utils import find_top_rpn_proposals
+
+RPN_HEAD_REGISTRY = Registry("RPN_HEAD")
+RPN_HEAD_REGISTRY.__doc__ = """
+Registry for RPN heads, which take feature maps and perform
+objectness classification and bounding box regression for anchors.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+The call should return a `nn.Module` object.
+"""
+
+
+"""
+Shape shorthand in this module:
+
+    N: number of images in the minibatch
+    L: number of feature maps per image on which RPN is run
+    A: number of cell anchors (must be the same for all feature maps)
+    Hi, Wi: height and width of the i-th feature map
+    B: size of the box parameterization
+
+Naming convention:
+
+    objectness: refers to the binary classification of an anchor as object vs. not object.
+
+    deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box
+    transform (see :class:`box_regression.Box2BoxTransform`), or 5d for rotated boxes.
+
+    pred_objectness_logits: predicted objectness scores in [-inf, +inf]; use
+        sigmoid(pred_objectness_logits) to estimate P(object).
+
+    gt_labels: ground-truth binary classification labels for objectness
+
+    pred_anchor_deltas: predicted box2box transform deltas
+
+    gt_anchor_deltas: ground-truth box2box transform deltas
+"""
+
+
+def build_rpn_head(cfg, input_shape):
+    """
+    Build an RPN head defined by `cfg.MODEL.RPN.HEAD_NAME`.
+    """
+    name = cfg.MODEL.RPN.HEAD_NAME
+    return RPN_HEAD_REGISTRY.get(name)(cfg, input_shape)
+
+
+@RPN_HEAD_REGISTRY.register()
+class StandardRPNHead(nn.Module):
+    """
+    Standard RPN classification and regression heads described in :paper:`Faster R-CNN`.
+    Uses a 3x3 conv to produce a shared hidden state from which one 1x1 conv predicts
+    objectness logits for each anchor and a second 1x1 conv predicts bounding-box deltas
+    specifying how to deform each anchor into an object proposal.
+    """
+
+    @configurable
+    def __init__(
+        self, *, in_channels: int, num_anchors: int, box_dim: int = 4, conv_dims: List[int] = (-1,)
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            in_channels (int): number of input feature channels. When using multiple
+                input features, they must have the same number of channels.
+            num_anchors (int): number of anchors to predict for *each spatial position*
+                on the feature map. The total number of anchors for each
+                feature map will be `num_anchors * H * W`.
+            box_dim (int): dimension of a box, which is also the number of box regression
+                predictions to make for each anchor. An axis aligned box has
+                box_dim=4, while a rotated box has box_dim=5.
+            conv_dims (list[int]): a list of integers representing the output channels
+                of N conv layers. Set it to -1 to use the same number of output channels
+                as input channels.
+        """
+        super().__init__()
+        cur_channels = in_channels
+        # Keeping the old variable names and structure for backwards compatiblity.
+        # Otherwise the old checkpoints will fail to load.
+        if len(conv_dims) == 1:
+            out_channels = cur_channels if conv_dims[0] == -1 else conv_dims[0]
+            # 3x3 conv for the hidden representation
+            self.conv = self._get_rpn_conv(cur_channels, out_channels)
+            cur_channels = out_channels
+        else:
+            self.conv = nn.Sequential()
+            for k, conv_dim in enumerate(conv_dims):
+                out_channels = cur_channels if conv_dim == -1 else conv_dim
+                if out_channels <= 0:
+                    raise ValueError(
+                        f"Conv output channels should be greater than 0. Got {out_channels}"
+                    )
+                conv = self._get_rpn_conv(cur_channels, out_channels)
+                self.conv.add_module(f"conv{k}", conv)
+                cur_channels = out_channels
+        # 1x1 conv for predicting objectness logits
+        self.objectness_logits = nn.Conv2d(cur_channels, num_anchors, kernel_size=1, stride=1)
+        # 1x1 conv for predicting box2box transform deltas
+        self.anchor_deltas = nn.Conv2d(cur_channels, num_anchors * box_dim, kernel_size=1, stride=1)
+
+        # Keeping the order of weights initialization same for backwards compatiblility.
+        for layer in self.modules():
+            if isinstance(layer, nn.Conv2d):
+                nn.init.normal_(layer.weight, std=0.01)
+                nn.init.constant_(layer.bias, 0)
+
+    def _get_rpn_conv(self, in_channels, out_channels):
+        return Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            activation=nn.ReLU(),
+        )
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        # Standard RPN is shared across levels:
+        in_channels = [s.channels for s in input_shape]
+        assert len(set(in_channels)) == 1, "Each level must have the same channel!"
+        in_channels = in_channels[0]
+
+        # RPNHead should take the same input as anchor generator
+        # NOTE: it assumes that creating an anchor generator does not have unwanted side effect.
+        anchor_generator = build_anchor_generator(cfg, input_shape)
+        num_anchors = anchor_generator.num_anchors
+        box_dim = anchor_generator.box_dim
+        assert (
+            len(set(num_anchors)) == 1
+        ), "Each level must have the same number of anchors per spatial position"
+        return {
+            "in_channels": in_channels,
+            "num_anchors": num_anchors[0],
+            "box_dim": box_dim,
+            "conv_dims": cfg.MODEL.RPN.CONV_DIMS,
+        }
+
+    def forward(self, features: List[torch.Tensor]):
+        """
+        Args:
+            features (list[Tensor]): list of feature maps
+
+        Returns:
+            list[Tensor]: A list of L elements.
+                Element i is a tensor of shape (N, A, Hi, Wi) representing
+                the predicted objectness logits for all anchors. A is the number of cell anchors.
+            list[Tensor]: A list of L elements. Element i is a tensor of shape
+                (N, A*box_dim, Hi, Wi) representing the predicted "deltas" used to transform anchors
+                to proposals.
+        """
+        pred_objectness_logits = []
+        pred_anchor_deltas = []
+        for x in features:
+            t = self.conv(x)
+            pred_objectness_logits.append(self.objectness_logits(t))
+            pred_anchor_deltas.append(self.anchor_deltas(t))
+        return pred_objectness_logits, pred_anchor_deltas
+
+
+@PROPOSAL_GENERATOR_REGISTRY.register()
+class RPN(nn.Module):
+    """
+    Region Proposal Network, introduced by :paper:`Faster R-CNN`.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        in_features: List[str],
+        head: nn.Module,
+        anchor_generator: nn.Module,
+        anchor_matcher: Matcher,
+        box2box_transform: Box2BoxTransform,
+        batch_size_per_image: int,
+        positive_fraction: float,
+        pre_nms_topk: Tuple[float, float],
+        post_nms_topk: Tuple[float, float],
+        nms_thresh: float = 0.7,
+        min_box_size: float = 0.0,
+        anchor_boundary_thresh: float = -1.0,
+        loss_weight: Union[float, Dict[str, float]] = 1.0,
+        box_reg_loss_type: str = "smooth_l1",
+        smooth_l1_beta: float = 0.0,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            in_features (list[str]): list of names of input features to use
+            head (nn.Module): a module that predicts logits and regression deltas
+                for each level from a list of per-level features
+            anchor_generator (nn.Module): a module that creates anchors from a
+                list of features. Usually an instance of :class:`AnchorGenerator`
+            anchor_matcher (Matcher): label the anchors by matching them with ground truth.
+            box2box_transform (Box2BoxTransform): defines the transform from anchors boxes to
+                instance boxes
+            batch_size_per_image (int): number of anchors per image to sample for training
+            positive_fraction (float): fraction of foreground anchors to sample for training
+            pre_nms_topk (tuple[float]): (train, test) that represents the
+                number of top k proposals to select before NMS, in
+                training and testing.
+            post_nms_topk (tuple[float]): (train, test) that represents the
+                number of top k proposals to select after NMS, in
+                training and testing.
+            nms_thresh (float): NMS threshold used to de-duplicate the predicted proposals
+            min_box_size (float): remove proposal boxes with any side smaller than this threshold,
+                in the unit of input image pixels
+            anchor_boundary_thresh (float): legacy option
+            loss_weight (float|dict): weights to use for losses. Can be single float for weighting
+                all rpn losses together, or a dict of individual weightings. Valid dict keys are:
+                    "loss_rpn_cls" - applied to classification loss
+                    "loss_rpn_loc" - applied to box regression loss
+            box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou".
+            smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to
+                use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1"
+        """
+        super().__init__()
+        self.in_features = in_features
+        self.rpn_head = head
+        self.anchor_generator = anchor_generator
+        self.anchor_matcher = anchor_matcher
+        self.box2box_transform = box2box_transform
+        self.batch_size_per_image = batch_size_per_image
+        self.positive_fraction = positive_fraction
+        # Map from self.training state to train/test settings
+        self.pre_nms_topk = {True: pre_nms_topk[0], False: pre_nms_topk[1]}
+        self.post_nms_topk = {True: post_nms_topk[0], False: post_nms_topk[1]}
+        self.nms_thresh = nms_thresh
+        self.min_box_size = float(min_box_size)
+        self.anchor_boundary_thresh = anchor_boundary_thresh
+        if isinstance(loss_weight, float):
+            loss_weight = {"loss_rpn_cls": loss_weight, "loss_rpn_loc": loss_weight}
+        self.loss_weight = loss_weight
+        self.box_reg_loss_type = box_reg_loss_type
+        self.smooth_l1_beta = smooth_l1_beta
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
+        in_features = cfg.MODEL.RPN.IN_FEATURES
+        ret = {
+            "in_features": in_features,
+            "min_box_size": cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE,
+            "nms_thresh": cfg.MODEL.RPN.NMS_THRESH,
+            "batch_size_per_image": cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE,
+            "positive_fraction": cfg.MODEL.RPN.POSITIVE_FRACTION,
+            "loss_weight": {
+                "loss_rpn_cls": cfg.MODEL.RPN.LOSS_WEIGHT,
+                "loss_rpn_loc": cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * cfg.MODEL.RPN.LOSS_WEIGHT,
+            },
+            "anchor_boundary_thresh": cfg.MODEL.RPN.BOUNDARY_THRESH,
+            "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS),
+            "box_reg_loss_type": cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE,
+            "smooth_l1_beta": cfg.MODEL.RPN.SMOOTH_L1_BETA,
+        }
+
+        ret["pre_nms_topk"] = (cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN, cfg.MODEL.RPN.PRE_NMS_TOPK_TEST)
+        ret["post_nms_topk"] = (cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN, cfg.MODEL.RPN.POST_NMS_TOPK_TEST)
+
+        ret["anchor_generator"] = build_anchor_generator(cfg, [input_shape[f] for f in in_features])
+        ret["anchor_matcher"] = Matcher(
+            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True
+        )
+        ret["head"] = build_rpn_head(cfg, [input_shape[f] for f in in_features])
+        return ret
+
+    def _subsample_labels(self, label):
+        """
+        Randomly sample a subset of positive and negative examples, and overwrite
+        the label vector to the ignore value (-1) for all elements that are not
+        included in the sample.
+
+        Args:
+            labels (Tensor): a vector of -1, 0, 1. Will be modified in-place and returned.
+        """
+        pos_idx, neg_idx = subsample_labels(
+            label, self.batch_size_per_image, self.positive_fraction, 0
+        )
+        # Fill with the ignore label (-1), then set positive and negative labels
+        label.fill_(-1)
+        label.scatter_(0, pos_idx, 1)
+        label.scatter_(0, neg_idx, 0)
+        return label
+
+    @torch.jit.unused
+    @torch.no_grad()
+    def label_and_sample_anchors(
+        self, anchors: List[Boxes], gt_instances: List[Instances]
+    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+        """
+        Args:
+            anchors (list[Boxes]): anchors for each feature map.
+            gt_instances: the ground-truth instances for each image.
+
+        Returns:
+            list[Tensor]:
+                List of #img tensors. i-th element is a vector of labels whose length is
+                the total number of anchors across all feature maps R = sum(Hi * Wi * A).
+                Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative
+                class; 1 = positive class.
+            list[Tensor]:
+                i-th element is a Rx4 tensor. The values are the matched gt boxes for each
+                anchor. Values are undefined for those anchors not labeled as 1.
+        """
+        anchors = Boxes.cat(anchors)
+
+        gt_boxes = [x.gt_boxes for x in gt_instances]
+        image_sizes = [x.image_size for x in gt_instances]
+        del gt_instances
+
+        gt_labels = []
+        matched_gt_boxes = []
+        for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes):
+            """
+            image_size_i: (h, w) for the i-th image
+            gt_boxes_i: ground-truth boxes for i-th image
+            """
+
+            match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors)
+            matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix)
+            # Matching is memory-expensive and may result in CPU tensors. But the result is small
+            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)
+            del match_quality_matrix
+
+            if self.anchor_boundary_thresh >= 0:
+                # Discard anchors that go out of the boundaries of the image
+                # NOTE: This is legacy functionality that is turned off by default in Detectron2
+                anchors_inside_image = anchors.inside_box(image_size_i, self.anchor_boundary_thresh)
+                gt_labels_i[~anchors_inside_image] = -1
+
+            # A vector of labels (-1, 0, 1) for each anchor
+            gt_labels_i = self._subsample_labels(gt_labels_i)
+
+            if len(gt_boxes_i) == 0:
+                # These values won't be used anyway since the anchor is labeled as background
+                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
+            else:
+                # TODO wasted indexing computation for ignored boxes
+                matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor
+
+            gt_labels.append(gt_labels_i)  # N,AHW
+            matched_gt_boxes.append(matched_gt_boxes_i)
+        return gt_labels, matched_gt_boxes
+
+    @torch.jit.unused
+    def losses(
+        self,
+        anchors: List[Boxes],
+        pred_objectness_logits: List[torch.Tensor],
+        gt_labels: List[torch.Tensor],
+        pred_anchor_deltas: List[torch.Tensor],
+        gt_boxes: List[torch.Tensor],
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Return the losses from a set of RPN predictions and their associated ground-truth.
+
+        Args:
+            anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each
+                has shape (Hi*Wi*A, B), where B is box dimension (4 or 5).
+            pred_objectness_logits (list[Tensor]): A list of L elements.
+                Element i is a tensor of shape (N, Hi*Wi*A) representing
+                the predicted objectness logits for all anchors.
+            gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`.
+            pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape
+                (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors
+                to proposals.
+            gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`.
+
+        Returns:
+            dict[loss name -> loss value]: A dict mapping from loss name to loss value.
+                Loss names are: `loss_rpn_cls` for objectness classification and
+                `loss_rpn_loc` for proposal localization.
+        """
+        num_images = len(gt_labels)
+        gt_labels = torch.stack(gt_labels)  # (N, sum(Hi*Wi*Ai))
+
+        # Log the number of positive/negative anchors per-image that's used in training
+        pos_mask = gt_labels == 1
+        num_pos_anchors = pos_mask.sum().item()
+        num_neg_anchors = (gt_labels == 0).sum().item()
+        storage = get_event_storage()
+        storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images)
+        storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images)
+
+        localization_loss = _dense_box_regression_loss(
+            anchors,
+            self.box2box_transform,
+            pred_anchor_deltas,
+            gt_boxes,
+            pos_mask,
+            box_reg_loss_type=self.box_reg_loss_type,
+            smooth_l1_beta=self.smooth_l1_beta,
+        )
+
+        valid_mask = gt_labels >= 0
+        objectness_loss = F.binary_cross_entropy_with_logits(
+            cat(pred_objectness_logits, dim=1)[valid_mask],
+            gt_labels[valid_mask].to(torch.float32),
+            reduction="sum",
+        )
+        normalizer = self.batch_size_per_image * num_images
+        losses = {
+            "loss_rpn_cls": objectness_loss / normalizer,
+            # The original Faster R-CNN paper uses a slightly different normalizer
+            # for loc loss. But it doesn't matter in practice
+            "loss_rpn_loc": localization_loss / normalizer,
+        }
+        losses = {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()}
+        return losses
+
+    def forward(
+        self,
+        images: ImageList,
+        features: Dict[str, torch.Tensor],
+        gt_instances: Optional[List[Instances]] = None,
+    ):
+        """
+        Args:
+            images (ImageList): input images of length `N`
+            features (dict[str, Tensor]): input data as a mapping from feature
+                map name to tensor. Axis 0 represents the number of images `N` in
+                the input data; axes 1-3 are channels, height, and width, which may
+                vary between feature maps (e.g., if a feature pyramid is used).
+            gt_instances (list[Instances], optional): a length `N` list of `Instances`s.
+                Each `Instances` stores ground-truth instances for the corresponding image.
+
+        Returns:
+            proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits"
+            loss: dict[Tensor] or None
+        """
+        features = [features[f] for f in self.in_features]
+        anchors = self.anchor_generator(features)
+
+        pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features)
+        # Transpose the Hi*Wi*A dimension to the middle:
+        pred_objectness_logits = [
+            # (N, A, Hi, Wi) -> (N, Hi, Wi, A) -> (N, Hi*Wi*A)
+            score.permute(0, 2, 3, 1).flatten(1)
+            for score in pred_objectness_logits
+        ]
+        pred_anchor_deltas = [
+            # (N, A*B, Hi, Wi) -> (N, A, B, Hi, Wi) -> (N, Hi, Wi, A, B) -> (N, Hi*Wi*A, B)
+            x.view(x.shape[0], -1, self.anchor_generator.box_dim, x.shape[-2], x.shape[-1])
+            .permute(0, 3, 4, 1, 2)
+            .flatten(1, -2)
+            for x in pred_anchor_deltas
+        ]
+
+        if self.training:
+            assert gt_instances is not None, "RPN requires gt_instances in training!"
+            gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances)
+            losses = self.losses(
+                anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes
+            )
+        else:
+            losses = {}
+        proposals = self.predict_proposals(
+            anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes
+        )
+        return proposals, losses
+
+    def predict_proposals(
+        self,
+        anchors: List[Boxes],
+        pred_objectness_logits: List[torch.Tensor],
+        pred_anchor_deltas: List[torch.Tensor],
+        image_sizes: List[Tuple[int, int]],
+    ):
+        """
+        Decode all the predicted box regression deltas to proposals. Find the top proposals
+        by applying NMS and removing boxes that are too small.
+
+        Returns:
+            proposals (list[Instances]): list of N Instances. The i-th Instances
+                stores post_nms_topk object proposals for image i, sorted by their
+                objectness score in descending order.
+        """
+        # The proposals are treated as fixed for joint training with roi heads.
+        # This approach ignores the derivative w.r.t. the proposal boxes’ coordinates that
+        # are also network responses.
+        with torch.no_grad():
+            pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
+            return find_top_rpn_proposals(
+                pred_proposals,
+                pred_objectness_logits,
+                image_sizes,
+                self.nms_thresh,
+                self.pre_nms_topk[self.training],
+                self.post_nms_topk[self.training],
+                self.min_box_size,
+                self.training,
+            )
+
+    def _decode_proposals(self, anchors: List[Boxes], pred_anchor_deltas: List[torch.Tensor]):
+        """
+        Transform anchors into proposals by applying the predicted anchor deltas.
+
+        Returns:
+            proposals (list[Tensor]): A list of L tensors. Tensor i has shape
+                (N, Hi*Wi*A, B)
+        """
+        N = pred_anchor_deltas[0].shape[0]
+        proposals = []
+        # For each feature map
+        for anchors_i, pred_anchor_deltas_i in zip(anchors, pred_anchor_deltas):
+            B = anchors_i.tensor.size(1)
+            pred_anchor_deltas_i = pred_anchor_deltas_i.reshape(-1, B)
+            # Expand anchors to shape (N*Hi*Wi*A, B)
+            anchors_i = anchors_i.tensor.unsqueeze(0).expand(N, -1, -1).reshape(-1, B)
+            proposals_i = self.box2box_transform.apply_deltas(pred_anchor_deltas_i, anchors_i)
+            # Append feature map proposals with shape (N, Hi*Wi*A, B)
+            proposals.append(proposals_i.view(N, -1, B))
+        return proposals
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/rrpn.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/rrpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ee4d8fd70430c5242cc02a1df8400493ffd75b7
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/proposal_generator/rrpn.py
@@ -0,0 +1,203 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import logging
+from typing import Dict, List
+import torch
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec, batched_nms_rotated, cat
+from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated
+from detectron2.utils.memory import retry_if_cuda_oom
+
+from ..box_regression import Box2BoxTransformRotated
+from .build import PROPOSAL_GENERATOR_REGISTRY
+from .rpn import RPN
+
+logger = logging.getLogger(__name__)
+
+
+def find_top_rrpn_proposals(
+    proposals,
+    pred_objectness_logits,
+    image_sizes,
+    nms_thresh,
+    pre_nms_topk,
+    post_nms_topk,
+    min_box_size,
+    training,
+):
+    """
+    For each feature map, select the `pre_nms_topk` highest scoring proposals,
+    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
+    highest scoring proposals among all the feature maps if `training` is True,
+    otherwise, returns the highest `post_nms_topk` scoring proposals for each
+    feature map.
+
+    Args:
+        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 5).
+            All proposal predictions on the feature maps.
+        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
+        image_sizes (list[tuple]): sizes (h, w) for each image
+        nms_thresh (float): IoU threshold to use for NMS
+        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
+            When RRPN is run on multiple feature maps (as in FPN) this number is per
+            feature map.
+        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
+            When RRPN is run on multiple feature maps (as in FPN) this number is total,
+            over all feature maps.
+        min_box_size(float): minimum proposal box side length in pixels (absolute units wrt
+            input images).
+        training (bool): True if proposals are to be used in training, otherwise False.
+            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
+            comment.
+
+    Returns:
+        proposals (list[Instances]): list of N Instances. The i-th Instances
+            stores post_nms_topk object proposals for image i.
+    """
+    num_images = len(image_sizes)
+    device = proposals[0].device
+
+    # 1. Select top-k anchor for every level and every image
+    topk_scores = []  # #lvl Tensor, each of shape N x topk
+    topk_proposals = []
+    level_ids = []  # #lvl Tensor, each of shape (topk,)
+    batch_idx = torch.arange(num_images, device=device)
+    for level_id, proposals_i, logits_i in zip(
+        itertools.count(), proposals, pred_objectness_logits
+    ):
+        Hi_Wi_A = logits_i.shape[1]
+        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)
+
+        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
+        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
+        logits_i, idx = logits_i.sort(descending=True, dim=1)
+        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
+        topk_idx = idx[batch_idx, :num_proposals_i]
+
+        # each is N x topk
+        topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx]  # N x topk x 5
+
+        topk_proposals.append(topk_proposals_i)
+        topk_scores.append(topk_scores_i)
+        level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device))
+
+    # 2. Concat all levels together
+    topk_scores = cat(topk_scores, dim=1)
+    topk_proposals = cat(topk_proposals, dim=1)
+    level_ids = cat(level_ids, dim=0)
+
+    # 3. For each image, run a per-level NMS, and choose topk results.
+    results = []
+    for n, image_size in enumerate(image_sizes):
+        boxes = RotatedBoxes(topk_proposals[n])
+        scores_per_img = topk_scores[n]
+        valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
+        if not valid_mask.all():
+            boxes = boxes[valid_mask]
+            scores_per_img = scores_per_img[valid_mask]
+        boxes.clip(image_size)
+
+        # filter empty boxes
+        keep = boxes.nonempty(threshold=min_box_size)
+        lvl = level_ids
+        if keep.sum().item() != len(boxes):
+            boxes, scores_per_img, lvl = (boxes[keep], scores_per_img[keep], level_ids[keep])
+
+        keep = batched_nms_rotated(boxes.tensor, scores_per_img, lvl, nms_thresh)
+        # In Detectron1, there was different behavior during training vs. testing.
+        # (https://github.com/facebookresearch/Detectron/issues/459)
+        # During training, topk is over the proposals from *all* images in the training batch.
+        # During testing, it is over the proposals for each image separately.
+        # As a result, the training behavior becomes batch-dependent,
+        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
+        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
+        keep = keep[:post_nms_topk]
+
+        res = Instances(image_size)
+        res.proposal_boxes = boxes[keep]
+        res.objectness_logits = scores_per_img[keep]
+        results.append(res)
+    return results
+
+
+@PROPOSAL_GENERATOR_REGISTRY.register()
+class RRPN(RPN):
+    """
+    Rotated Region Proposal Network described in :paper:`RRPN`.
+    """
+
+    @configurable
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if self.anchor_boundary_thresh >= 0:
+            raise NotImplementedError(
+                "anchor_boundary_thresh is a legacy option not implemented for RRPN."
+            )
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
+        ret = super().from_config(cfg, input_shape)
+        ret["box2box_transform"] = Box2BoxTransformRotated(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
+        return ret
+
+    @torch.no_grad()
+    def label_and_sample_anchors(self, anchors: List[RotatedBoxes], gt_instances: List[Instances]):
+        """
+        Args:
+            anchors (list[RotatedBoxes]): anchors for each feature map.
+            gt_instances: the ground-truth instances for each image.
+
+        Returns:
+            list[Tensor]:
+                List of #img tensors. i-th element is a vector of labels whose length is
+                the total number of anchors across feature maps. Label values are in {-1, 0, 1},
+                with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
+            list[Tensor]:
+                i-th element is a Nx5 tensor, where N is the total number of anchors across
+                feature maps.  The values are the matched gt boxes for each anchor.
+                Values are undefined for those anchors not labeled as 1.
+        """
+        anchors = RotatedBoxes.cat(anchors)
+
+        gt_boxes = [x.gt_boxes for x in gt_instances]
+        del gt_instances
+
+        gt_labels = []
+        matched_gt_boxes = []
+        for gt_boxes_i in gt_boxes:
+            """
+            gt_boxes_i: ground-truth boxes for i-th image
+            """
+            match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)(gt_boxes_i, anchors)
+            matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix)
+            # Matching is memory-expensive and may result in CPU tensors. But the result is small
+            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)
+
+            # A vector of labels (-1, 0, 1) for each anchor
+            gt_labels_i = self._subsample_labels(gt_labels_i)
+
+            if len(gt_boxes_i) == 0:
+                # These values won't be used anyway since the anchor is labeled as background
+                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
+            else:
+                # TODO wasted indexing computation for ignored boxes
+                matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor
+
+            gt_labels.append(gt_labels_i)  # N,AHW
+            matched_gt_boxes.append(matched_gt_boxes_i)
+        return gt_labels, matched_gt_boxes
+
+    @torch.no_grad()
+    def predict_proposals(self, anchors, pred_objectness_logits, pred_anchor_deltas, image_sizes):
+        pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
+        return find_top_rrpn_proposals(
+            pred_proposals,
+            pred_objectness_logits,
+            image_sizes,
+            self.nms_thresh,
+            self.pre_nms_topk[self.training],
+            self.post_nms_topk[self.training],
+            self.min_box_size,
+            self.training,
+        )
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..35b98746c1e2510dfcfebccbed4c72babb61925b
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/__init__.py
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead
+from .keypoint_head import (
+    ROI_KEYPOINT_HEAD_REGISTRY,
+    build_keypoint_head,
+    BaseKeypointRCNNHead,
+    KRCNNConvDeconvUpsampleHead,
+)
+from .mask_head import (
+    ROI_MASK_HEAD_REGISTRY,
+    build_mask_head,
+    BaseMaskRCNNHead,
+    MaskRCNNConvUpsampleHead,
+)
+from .roi_heads import (
+    ROI_HEADS_REGISTRY,
+    ROIHeads,
+    Res5ROIHeads,
+    StandardROIHeads,
+    build_roi_heads,
+    select_foreground_proposals,
+)
+from .rotated_fast_rcnn import RROIHeads
+from .fast_rcnn import FastRCNNOutputLayers
+
+from . import cascade_rcnn  # isort:skip
+
+__all__ = list(globals().keys())
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/box_head.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/box_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d0370b0400d9268f13c905e4096a84ce42e9bfd
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/box_head.py
@@ -0,0 +1,118 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from typing import List
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.utils.registry import Registry
+
+__all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"]
+
+ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD")
+ROI_BOX_HEAD_REGISTRY.__doc__ = """
+Registry for box heads, which make box predictions from per-region features.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+"""
+
+
+# To get torchscript support, we make the head a subclass of `nn.Sequential`.
+# Therefore, to add new layers in this head class, please make sure they are
+# added in the order they will be used in forward().
+@ROI_BOX_HEAD_REGISTRY.register()
+class FastRCNNConvFCHead(nn.Sequential):
+    """
+    A head with several 3x3 conv layers (each followed by norm & relu) and then
+    several fc layers (each followed by relu).
+    """
+
+    @configurable
+    def __init__(
+        self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm=""
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (ShapeSpec): shape of the input feature.
+            conv_dims (list[int]): the output dimensions of the conv layers
+            fc_dims (list[int]): the output dimensions of the fc layers
+            conv_norm (str or callable): normalization for the conv layers.
+                See :func:`detectron2.layers.get_norm` for supported types.
+        """
+        super().__init__()
+        assert len(conv_dims) + len(fc_dims) > 0
+
+        self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
+
+        self.conv_norm_relus = []
+        for k, conv_dim in enumerate(conv_dims):
+            conv = Conv2d(
+                self._output_size[0],
+                conv_dim,
+                kernel_size=3,
+                padding=1,
+                bias=not conv_norm,
+                norm=get_norm(conv_norm, conv_dim),
+                activation=nn.ReLU(),
+            )
+            self.add_module("conv{}".format(k + 1), conv)
+            self.conv_norm_relus.append(conv)
+            self._output_size = (conv_dim, self._output_size[1], self._output_size[2])
+
+        self.fcs = []
+        for k, fc_dim in enumerate(fc_dims):
+            if k == 0:
+                self.add_module("flatten", nn.Flatten())
+            fc = nn.Linear(int(np.prod(self._output_size)), fc_dim)
+            self.add_module("fc{}".format(k + 1), fc)
+            self.add_module("fc_relu{}".format(k + 1), nn.ReLU())
+            self.fcs.append(fc)
+            self._output_size = fc_dim
+
+        for layer in self.conv_norm_relus:
+            weight_init.c2_msra_fill(layer)
+        for layer in self.fcs:
+            weight_init.c2_xavier_fill(layer)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
+        conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
+        num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
+        fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
+        return {
+            "input_shape": input_shape,
+            "conv_dims": [conv_dim] * num_conv,
+            "fc_dims": [fc_dim] * num_fc,
+            "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM,
+        }
+
+    def forward(self, x):
+        for layer in self:
+            x = layer(x)
+        return x
+
+    @property
+    @torch.jit.unused
+    def output_shape(self):
+        """
+        Returns:
+            ShapeSpec: the output feature shape
+        """
+        o = self._output_size
+        if isinstance(o, int):
+            return ShapeSpec(channels=o)
+        else:
+            return ShapeSpec(channels=o[0], height=o[1], width=o[2])
+
+
+def build_box_head(cfg, input_shape):
+    """
+    Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`.
+    """
+    name = cfg.MODEL.ROI_BOX_HEAD.NAME
+    return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/cascade_rcnn.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/cascade_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f6bfab22ed9c30a98f27b849a7fa3e210ba9cf2
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/cascade_rcnn.py
@@ -0,0 +1,298 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import List
+import torch
+from torch import nn
+from torch.autograd.function import Function
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec
+from detectron2.structures import Boxes, Instances, pairwise_iou
+from detectron2.utils.events import get_event_storage
+
+from ..box_regression import Box2BoxTransform
+from ..matcher import Matcher
+from ..poolers import ROIPooler
+from .box_head import build_box_head
+from .fast_rcnn import FastRCNNOutputLayers, fast_rcnn_inference
+from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads
+
+
+class _ScaleGradient(Function):
+    @staticmethod
+    def forward(ctx, input, scale):
+        ctx.scale = scale
+        return input
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output * ctx.scale, None
+
+
+@ROI_HEADS_REGISTRY.register()
+class CascadeROIHeads(StandardROIHeads):
+    """
+    Implement :paper:`Cascade R-CNN`.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        box_in_features: List[str],
+        box_pooler: ROIPooler,
+        box_heads: List[nn.Module],
+        box_predictors: List[nn.Module],
+        proposal_matchers: List[Matcher],
+        **kwargs,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            box_pooler (ROIPooler): pooler that extracts region features from given boxes
+            box_heads (list[nn.Module]): box head for each cascade stage
+            box_predictors (list[nn.Module]): box predictor for each cascade stage
+            proposal_matchers (list[Matcher]): matcher with different IoU thresholds to
+                match boxes with ground truth for each stage. The first matcher matches
+                RPN proposals with ground truth, the other matchers use boxes predicted
+                by the previous stage as proposals and match them with ground truth.
+        """
+        assert "proposal_matcher" not in kwargs, (
+            "CascadeROIHeads takes 'proposal_matchers=' for each stage instead "
+            "of one 'proposal_matcher='."
+        )
+        # The first matcher matches RPN proposals with ground truth, done in the base class
+        kwargs["proposal_matcher"] = proposal_matchers[0]
+        num_stages = self.num_cascade_stages = len(box_heads)
+        box_heads = nn.ModuleList(box_heads)
+        box_predictors = nn.ModuleList(box_predictors)
+        assert len(box_predictors) == num_stages, f"{len(box_predictors)} != {num_stages}!"
+        assert len(proposal_matchers) == num_stages, f"{len(proposal_matchers)} != {num_stages}!"
+        super().__init__(
+            box_in_features=box_in_features,
+            box_pooler=box_pooler,
+            box_head=box_heads,
+            box_predictor=box_predictors,
+            **kwargs,
+        )
+        self.proposal_matchers = proposal_matchers
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = super().from_config(cfg, input_shape)
+        ret.pop("proposal_matcher")
+        return ret
+
+    @classmethod
+    def _init_box_head(cls, cfg, input_shape):
+        # fmt: off
+        in_features              = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution        = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_scales            = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio           = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type              = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+        cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS
+        cascade_ious             = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS
+        assert len(cascade_bbox_reg_weights) == len(cascade_ious)
+        assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,  \
+            "CascadeROIHeads only support class-agnostic regression now!"
+        assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0]
+        # fmt: on
+
+        in_channels = [input_shape[f].channels for f in in_features]
+        # Check all channel counts are equal
+        assert len(set(in_channels)) == 1, in_channels
+        in_channels = in_channels[0]
+
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        pooled_shape = ShapeSpec(
+            channels=in_channels, width=pooler_resolution, height=pooler_resolution
+        )
+
+        box_heads, box_predictors, proposal_matchers = [], [], []
+        for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights):
+            box_head = build_box_head(cfg, pooled_shape)
+            box_heads.append(box_head)
+            box_predictors.append(
+                FastRCNNOutputLayers(
+                    cfg,
+                    box_head.output_shape,
+                    box2box_transform=Box2BoxTransform(weights=bbox_reg_weights),
+                )
+            )
+            proposal_matchers.append(Matcher([match_iou], [0, 1], allow_low_quality_matches=False))
+        return {
+            "box_in_features": in_features,
+            "box_pooler": box_pooler,
+            "box_heads": box_heads,
+            "box_predictors": box_predictors,
+            "proposal_matchers": proposal_matchers,
+        }
+
+    def forward(self, images, features, proposals, targets=None):
+        del images
+        if self.training:
+            proposals = self.label_and_sample_proposals(proposals, targets)
+
+        if self.training:
+            # Need targets to box head
+            losses = self._forward_box(features, proposals, targets)
+            losses.update(self._forward_mask(features, proposals))
+            losses.update(self._forward_keypoint(features, proposals))
+            return proposals, losses
+        else:
+            pred_instances = self._forward_box(features, proposals)
+            pred_instances = self.forward_with_given_boxes(features, pred_instances)
+            return pred_instances, {}
+
+    def _forward_box(self, features, proposals, targets=None):
+        """
+        Args:
+            features, targets: the same as in
+                Same as in :meth:`ROIHeads.forward`.
+            proposals (list[Instances]): the per-image object proposals with
+                their matching ground truth.
+                Each has fields "proposal_boxes", and "objectness_logits",
+                "gt_classes", "gt_boxes".
+        """
+        features = [features[f] for f in self.box_in_features]
+        head_outputs = []  # (predictor, predictions, proposals)
+        prev_pred_boxes = None
+        image_sizes = [x.image_size for x in proposals]
+        for k in range(self.num_cascade_stages):
+            if k > 0:
+                # The output boxes of the previous stage are used to create the input
+                # proposals of the next stage.
+                proposals = self._create_proposals_from_boxes(prev_pred_boxes, image_sizes)
+                if self.training:
+                    proposals = self._match_and_label_boxes(proposals, k, targets)
+            predictions = self._run_stage(features, proposals, k)
+            prev_pred_boxes = self.box_predictor[k].predict_boxes(predictions, proposals)
+            head_outputs.append((self.box_predictor[k], predictions, proposals))
+
+        if self.training:
+            losses = {}
+            storage = get_event_storage()
+            for stage, (predictor, predictions, proposals) in enumerate(head_outputs):
+                with storage.name_scope("stage{}".format(stage)):
+                    stage_losses = predictor.losses(predictions, proposals)
+                losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()})
+            return losses
+        else:
+            # Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1)
+            scores_per_stage = [h[0].predict_probs(h[1], h[2]) for h in head_outputs]
+
+            # Average the scores across heads
+            scores = [
+                sum(list(scores_per_image)) * (1.0 / self.num_cascade_stages)
+                for scores_per_image in zip(*scores_per_stage)
+            ]
+            # Use the boxes of the last head
+            predictor, predictions, proposals = head_outputs[-1]
+            boxes = predictor.predict_boxes(predictions, proposals)
+            pred_instances, _ = fast_rcnn_inference(
+                boxes,
+                scores,
+                image_sizes,
+                predictor.test_score_thresh,
+                predictor.test_nms_thresh,
+                predictor.test_topk_per_image,
+            )
+            return pred_instances
+
+    @torch.no_grad()
+    def _match_and_label_boxes(self, proposals, stage, targets):
+        """
+        Match proposals with groundtruth using the matcher at the given stage.
+        Label the proposals as foreground or background based on the match.
+
+        Args:
+            proposals (list[Instances]): One Instances for each image, with
+                the field "proposal_boxes".
+            stage (int): the current stage
+            targets (list[Instances]): the ground truth instances
+
+        Returns:
+            list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes"
+        """
+        num_fg_samples, num_bg_samples = [], []
+        for proposals_per_image, targets_per_image in zip(proposals, targets):
+            match_quality_matrix = pairwise_iou(
+                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
+            )
+            # proposal_labels are 0 or 1
+            matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix)
+            if len(targets_per_image) > 0:
+                gt_classes = targets_per_image.gt_classes[matched_idxs]
+                # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
+                gt_classes[proposal_labels == 0] = self.num_classes
+                gt_boxes = targets_per_image.gt_boxes[matched_idxs]
+            else:
+                gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
+                gt_boxes = Boxes(
+                    targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4))
+                )
+            proposals_per_image.gt_classes = gt_classes
+            proposals_per_image.gt_boxes = gt_boxes
+
+            num_fg_samples.append((proposal_labels == 1).sum().item())
+            num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1])
+
+        # Log the number of fg/bg samples in each stage
+        storage = get_event_storage()
+        storage.put_scalar(
+            "stage{}/roi_head/num_fg_samples".format(stage),
+            sum(num_fg_samples) / len(num_fg_samples),
+        )
+        storage.put_scalar(
+            "stage{}/roi_head/num_bg_samples".format(stage),
+            sum(num_bg_samples) / len(num_bg_samples),
+        )
+        return proposals
+
+    def _run_stage(self, features, proposals, stage):
+        """
+        Args:
+            features (list[Tensor]): #lvl input features to ROIHeads
+            proposals (list[Instances]): #image Instances, with the field "proposal_boxes"
+            stage (int): the current stage
+
+        Returns:
+            Same output as `FastRCNNOutputLayers.forward()`.
+        """
+        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
+        # The original implementation averages the losses among heads,
+        # but scale up the parameter gradients of the heads.
+        # This is equivalent to adding the losses among heads,
+        # but scale down the gradients on features.
+        box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages)
+        box_features = self.box_head[stage](box_features)
+        return self.box_predictor[stage](box_features)
+
+    def _create_proposals_from_boxes(self, boxes, image_sizes):
+        """
+        Args:
+            boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4
+            image_sizes (list[tuple]): list of image shapes in (h, w)
+
+        Returns:
+            list[Instances]: per-image proposals with the given boxes.
+        """
+        # Just like RPN, the proposals should not have gradients
+        boxes = [Boxes(b.detach()) for b in boxes]
+        proposals = []
+        for boxes_per_image, image_size in zip(boxes, image_sizes):
+            boxes_per_image.clip(image_size)
+            if self.training:
+                # do not filter empty boxes at inference time,
+                # because the scores from each stage need to be aligned and added later
+                boxes_per_image = boxes_per_image[boxes_per_image.nonempty()]
+            prop = Instances(image_size)
+            prop.proposal_boxes = boxes_per_image
+            proposals.append(prop)
+        return proposals
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/fast_rcnn.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/fast_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..348f6a09782a9d686f91f28eefe1d8d5b6df939d
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/fast_rcnn.py
@@ -0,0 +1,622 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+from typing import Dict, List, Tuple, Union
+import torch
+from fvcore.nn import giou_loss, smooth_l1_loss
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec, batched_nms, cat, cross_entropy, nonzero_tuple
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.events import get_event_storage
+
+__all__ = ["fast_rcnn_inference", "FastRCNNOutputLayers"]
+
+
+logger = logging.getLogger(__name__)
+
+"""
+Shape shorthand in this module:
+
+    N: number of images in the minibatch
+    R: number of ROIs, combined over all images, in the minibatch
+    Ri: number of ROIs in image i
+    K: number of foreground classes. E.g.,there are 80 foreground classes in COCO.
+
+Naming convention:
+
+    deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box
+    transform (see :class:`box_regression.Box2BoxTransform`).
+
+    pred_class_logits: predicted class scores in [-inf, +inf]; use
+        softmax(pred_class_logits) to estimate P(class).
+
+    gt_classes: ground-truth classification labels in [0, K], where [0, K) represent
+        foreground object classes and K represents the background class.
+
+    pred_proposal_deltas: predicted box2box transform deltas for transforming proposals
+        to detection box predictions.
+
+    gt_proposal_deltas: ground-truth box2box transform deltas
+"""
+
+
+def fast_rcnn_inference(
+    boxes: List[torch.Tensor],
+    scores: List[torch.Tensor],
+    image_shapes: List[Tuple[int, int]],
+    score_thresh: float,
+    nms_thresh: float,
+    topk_per_image: int,
+):
+    """
+    Call `fast_rcnn_inference_single_image` for all images.
+
+    Args:
+        boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic
+            boxes for each image. Element i has shape (Ri, K * 4) if doing
+            class-specific regression, or (Ri, 4) if doing class-agnostic
+            regression, where Ri is the number of predicted objects for image i.
+            This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`.
+        scores (list[Tensor]): A list of Tensors of predicted class scores for each image.
+            Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
+            for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`.
+        image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch.
+        score_thresh (float): Only return detections with a confidence score exceeding this
+            threshold.
+        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1].
+        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
+            all detections.
+
+    Returns:
+        instances: (list[Instances]): A list of N instances, one for each image in the batch,
+            that stores the topk most confidence detections.
+        kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates
+            the corresponding boxes/scores index in [0, Ri) from the input, for image i.
+    """
+    result_per_image = [
+        fast_rcnn_inference_single_image(
+            boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image
+        )
+        for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes)
+    ]
+    return [x[0] for x in result_per_image], [x[1] for x in result_per_image]
+
+
+def _log_classification_stats(pred_logits, gt_classes, prefix="fast_rcnn"):
+    """
+    Log the classification metrics to EventStorage.
+
+    Args:
+        pred_logits: Rx(K+1) logits. The last column is for background class.
+        gt_classes: R labels
+    """
+    num_instances = gt_classes.numel()
+    if num_instances == 0:
+        return
+    pred_classes = pred_logits.argmax(dim=1)
+    bg_class_ind = pred_logits.shape[1] - 1
+
+    fg_inds = (gt_classes >= 0) & (gt_classes < bg_class_ind)
+    num_fg = fg_inds.nonzero().numel()
+    fg_gt_classes = gt_classes[fg_inds]
+    fg_pred_classes = pred_classes[fg_inds]
+
+    num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel()
+    num_accurate = (pred_classes == gt_classes).nonzero().numel()
+    fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel()
+
+    storage = get_event_storage()
+    storage.put_scalar(f"{prefix}/cls_accuracy", num_accurate / num_instances)
+    if num_fg > 0:
+        storage.put_scalar(f"{prefix}/fg_cls_accuracy", fg_num_accurate / num_fg)
+        storage.put_scalar(f"{prefix}/false_negative", num_false_negative / num_fg)
+
+
+def fast_rcnn_inference_single_image(
+    boxes,
+    scores,
+    image_shape: Tuple[int, int],
+    score_thresh: float,
+    nms_thresh: float,
+    topk_per_image: int,
+):
+    """
+    Single-image inference. Return bounding-box detection results by thresholding
+    on scores and applying non-maximum suppression (NMS).
+
+    Args:
+        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
+        per image.
+
+    Returns:
+        Same as `fast_rcnn_inference`, but for only one image.
+    """
+    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
+    if not valid_mask.all():
+        boxes = boxes[valid_mask]
+        scores = scores[valid_mask]
+
+    scores = scores[:, :-1]
+    num_bbox_reg_classes = boxes.shape[1] // 4
+    # Convert to Boxes to use the `clip` function ...
+    boxes = Boxes(boxes.reshape(-1, 4))
+    boxes.clip(image_shape)
+    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
+
+    # 1. Filter results based on detection scores. It can make NMS more efficient
+    #    by filtering out low-confidence detections.
+    filter_mask = scores > score_thresh  # R x K
+    # R' x 2. First column contains indices of the R predictions;
+    # Second column contains indices of classes.
+    filter_inds = filter_mask.nonzero()
+    if num_bbox_reg_classes == 1:
+        boxes = boxes[filter_inds[:, 0], 0]
+    else:
+        boxes = boxes[filter_mask]
+    scores = scores[filter_mask]
+
+    # 2. Apply NMS for each class independently.
+    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
+    if topk_per_image >= 0:
+        keep = keep[:topk_per_image]
+    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
+
+    result = Instances(image_shape)
+    result.pred_boxes = Boxes(boxes)
+    result.scores = scores
+    result.pred_classes = filter_inds[:, 1]
+    return result, filter_inds[:, 0]
+
+
+class FastRCNNOutputs:
+    """
+    An internal implementation that stores information about outputs of a Fast R-CNN head,
+    and provides methods that are used to decode the outputs of a Fast R-CNN head.
+    """
+
+    def __init__(
+        self,
+        box2box_transform,
+        pred_class_logits,
+        pred_proposal_deltas,
+        proposals,
+        smooth_l1_beta=0.0,
+        box_reg_loss_type="smooth_l1",
+    ):
+        """
+        Args:
+            box2box_transform (Box2BoxTransform/Box2BoxTransformRotated):
+                box2box transform instance for proposal-to-detection transformations.
+            pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class
+                logits for all R predicted object instances.
+                Each row corresponds to a predicted object instance.
+            pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for
+                class-specific or class-agnostic regression. It stores the predicted deltas that
+                transform proposals into final box detections.
+                B is the box dimension (4 or 5).
+                When B is 4, each row is [dx, dy, dw, dh (, ....)].
+                When B is 5, each row is [dx, dy, dw, dh, da (, ....)].
+            proposals (list[Instances]): A list of N Instances, where Instances i stores the
+                proposals for image i, in the field "proposal_boxes".
+                When training, each Instances must have ground-truth labels
+                stored in the field "gt_classes" and "gt_boxes".
+                The total number of all instances must be equal to R.
+            smooth_l1_beta (float): The transition point between L1 and L2 loss in
+                the smooth L1 loss function. When set to 0, the loss becomes L1. When
+                set to +inf, the loss becomes constant 0.
+            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
+        """
+        self.box2box_transform = box2box_transform
+        self.num_preds_per_image = [len(p) for p in proposals]
+        self.pred_class_logits = pred_class_logits
+        self.pred_proposal_deltas = pred_proposal_deltas
+        self.smooth_l1_beta = smooth_l1_beta
+        self.box_reg_loss_type = box_reg_loss_type
+
+        self.image_shapes = [x.image_size for x in proposals]
+
+        if len(proposals):
+            box_type = type(proposals[0].proposal_boxes)
+            # cat(..., dim=0) concatenates over all images in the batch
+            self.proposals = box_type.cat([p.proposal_boxes for p in proposals])
+            assert (
+                not self.proposals.tensor.requires_grad
+            ), "Proposals should not require gradients!"
+
+            # "gt_classes" exists if and only if training. But other gt fields may
+            # not necessarily exist in training for images that have no groundtruth.
+            if proposals[0].has("gt_classes"):
+                self.gt_classes = cat([p.gt_classes for p in proposals], dim=0)
+
+                # If "gt_boxes" does not exist, the proposals must be all negative and
+                # should not be included in regression loss computation.
+                # Here we just use proposal_boxes as an arbitrary placeholder because its
+                # value won't be used in self.box_reg_loss().
+                gt_boxes = [
+                    p.gt_boxes if p.has("gt_boxes") else p.proposal_boxes for p in proposals
+                ]
+                self.gt_boxes = box_type.cat(gt_boxes)
+        else:
+            self.proposals = Boxes(torch.zeros(0, 4, device=self.pred_proposal_deltas.device))
+        self._no_instances = len(self.proposals) == 0  # no instances found
+
+    def softmax_cross_entropy_loss(self):
+        """
+        Deprecated
+        """
+        _log_classification_stats(self.pred_class_logits, self.gt_classes)
+        return cross_entropy(self.pred_class_logits, self.gt_classes, reduction="mean")
+
+    def box_reg_loss(self):
+        """
+        Deprecated
+        """
+        if self._no_instances:
+            return 0.0 * self.pred_proposal_deltas.sum()
+
+        box_dim = self.proposals.tensor.size(1)  # 4 or 5
+        cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim
+        device = self.pred_proposal_deltas.device
+
+        bg_class_ind = self.pred_class_logits.shape[1] - 1
+        # Box delta loss is only computed between the prediction for the gt class k
+        # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
+        # for non-gt classes and background.
+        # Empty fg_inds should produce a valid loss of zero because reduction=sum.
+        fg_inds = nonzero_tuple((self.gt_classes >= 0) & (self.gt_classes < bg_class_ind))[0]
+
+        if cls_agnostic_bbox_reg:
+            # pred_proposal_deltas only corresponds to foreground class for agnostic
+            gt_class_cols = torch.arange(box_dim, device=device)
+        else:
+            # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
+            # where b is the dimension of box representation (4 or 5)
+            # Note that compared to Detectron1,
+            # we do not perform bounding box regression for background classes.
+            gt_class_cols = box_dim * self.gt_classes[fg_inds, None] + torch.arange(
+                box_dim, device=device
+            )
+
+        if self.box_reg_loss_type == "smooth_l1":
+            gt_proposal_deltas = self.box2box_transform.get_deltas(
+                self.proposals.tensor, self.gt_boxes.tensor
+            )
+            loss_box_reg = smooth_l1_loss(
+                self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
+                gt_proposal_deltas[fg_inds],
+                self.smooth_l1_beta,
+                reduction="sum",
+            )
+        elif self.box_reg_loss_type == "giou":
+            fg_pred_boxes = self.box2box_transform.apply_deltas(
+                self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
+                self.proposals.tensor[fg_inds],
+            )
+            loss_box_reg = giou_loss(
+                fg_pred_boxes,
+                self.gt_boxes.tensor[fg_inds],
+                reduction="sum",
+            )
+        else:
+            raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")
+
+        loss_box_reg = loss_box_reg / self.gt_classes.numel()
+        return loss_box_reg
+
+    def losses(self):
+        """
+        Deprecated
+        """
+        return {"loss_cls": self.softmax_cross_entropy_loss(), "loss_box_reg": self.box_reg_loss()}
+
+    def predict_boxes(self):
+        """
+        Deprecated
+        """
+        pred = self.box2box_transform.apply_deltas(self.pred_proposal_deltas, self.proposals.tensor)
+        return pred.split(self.num_preds_per_image, dim=0)
+
+    def predict_probs(self):
+        """
+        Deprecated
+        """
+        probs = F.softmax(self.pred_class_logits, dim=-1)
+        return probs.split(self.num_preds_per_image, dim=0)
+
+
+class FastRCNNOutputLayers(nn.Module):
+    """
+    Two linear layers for predicting Fast R-CNN outputs:
+
+    1. proposal-to-detection box regression deltas
+    2. classification scores
+    """
+
+    @configurable
+    def __init__(
+        self,
+        input_shape: ShapeSpec,
+        *,
+        box2box_transform,
+        num_classes: int,
+        test_score_thresh: float = 0.0,
+        test_nms_thresh: float = 0.5,
+        test_topk_per_image: int = 100,
+        cls_agnostic_bbox_reg: bool = False,
+        smooth_l1_beta: float = 0.0,
+        box_reg_loss_type: str = "smooth_l1",
+        loss_weight: Union[float, Dict[str, float]] = 1.0,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (ShapeSpec): shape of the input feature to this module
+            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
+            num_classes (int): number of foreground classes
+            test_score_thresh (float): threshold to filter predictions results.
+            test_nms_thresh (float): NMS threshold for prediction results.
+            test_topk_per_image (int): number of top predictions to produce per image.
+            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
+            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
+                `box_reg_loss_type` is "smooth_l1"
+            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
+            loss_weight (float|dict): weights to use for losses. Can be single float for weighting
+                all losses, or a dict of individual weightings. Valid dict keys are:
+                    * "loss_cls": applied to classification loss
+                    * "loss_box_reg": applied to box regression loss
+        """
+        super().__init__()
+        if isinstance(input_shape, int):  # some backward compatibility
+            input_shape = ShapeSpec(channels=input_shape)
+        self.num_classes = num_classes
+        input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1)
+        # prediction layer for num_classes foreground classes and one background class (hence + 1)
+        self.cls_score = nn.Linear(input_size, num_classes + 1)
+        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
+        box_dim = len(box2box_transform.weights)
+        self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim)
+
+        nn.init.normal_(self.cls_score.weight, std=0.01)
+        nn.init.normal_(self.bbox_pred.weight, std=0.001)
+        for l in [self.cls_score, self.bbox_pred]:
+            nn.init.constant_(l.bias, 0)
+
+        self.box2box_transform = box2box_transform
+        self.smooth_l1_beta = smooth_l1_beta
+        self.test_score_thresh = test_score_thresh
+        self.test_nms_thresh = test_nms_thresh
+        self.test_topk_per_image = test_topk_per_image
+        self.box_reg_loss_type = box_reg_loss_type
+        if isinstance(loss_weight, float):
+            loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight}
+        self.loss_weight = loss_weight
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {
+            "input_shape": input_shape,
+            "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS),
+            # fmt: off
+            "num_classes"           : cfg.MODEL.ROI_HEADS.NUM_CLASSES,
+            "cls_agnostic_bbox_reg" : cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,
+            "smooth_l1_beta"        : cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA,
+            "test_score_thresh"     : cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST,
+            "test_nms_thresh"       : cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST,
+            "test_topk_per_image"   : cfg.TEST.DETECTIONS_PER_IMAGE,
+            "box_reg_loss_type"     : cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE,
+            "loss_weight"           : {"loss_box_reg": cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT},
+            # fmt: on
+        }
+
+    def forward(self, x):
+        """
+        Args:
+            x: per-region features of shape (N, ...) for N bounding boxes to predict.
+
+        Returns:
+            (Tensor, Tensor):
+            First tensor: shape (N,K+1), scores for each of the N box. Each row contains the
+            scores for K object categories and 1 background class.
+
+            Second tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4),
+            or (N,4) for class-agnostic regression.
+        """
+        if x.dim() > 2:
+            x = torch.flatten(x, start_dim=1)
+        scores = self.cls_score(x)
+        proposal_deltas = self.bbox_pred(x)
+        return scores, proposal_deltas
+
+    def losses(self, predictions, proposals):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were used
+                to compute predictions. The fields ``proposal_boxes``, ``gt_boxes``,
+                ``gt_classes`` are expected.
+
+        Returns:
+            Dict[str, Tensor]: dict of losses
+        """
+        scores, proposal_deltas = predictions
+
+        # parse classification outputs
+        gt_classes = (
+            cat([p.gt_classes for p in proposals], dim=0) if len(proposals) else torch.empty(0)
+        )
+        _log_classification_stats(scores, gt_classes)
+
+        # parse box regression outputs
+        if len(proposals):
+            proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)  # Nx4
+            assert not proposal_boxes.requires_grad, "Proposals should not require gradients!"
+            # If "gt_boxes" does not exist, the proposals must be all negative and
+            # should not be included in regression loss computation.
+            # Here we just use proposal_boxes as an arbitrary placeholder because its
+            # value won't be used in self.box_reg_loss().
+            gt_boxes = cat(
+                [(p.gt_boxes if p.has("gt_boxes") else p.proposal_boxes).tensor for p in proposals],
+                dim=0,
+            )
+        else:
+            proposal_boxes = gt_boxes = torch.empty((0, 4), device=proposal_deltas.device)
+
+        losses = {
+            "loss_cls": cross_entropy(scores, gt_classes, reduction="mean"),
+            "loss_box_reg": self.box_reg_loss(
+                proposal_boxes, gt_boxes, proposal_deltas, gt_classes
+            ),
+        }
+        return {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()}
+
+    def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes):
+        """
+        Args:
+            All boxes are tensors with the same shape Rx(4 or 5).
+            gt_classes is a long tensor of shape R, the gt class label of each proposal.
+            R shall be the number of proposals.
+        """
+        box_dim = proposal_boxes.shape[1]  # 4 or 5
+        # Regression loss is only computed for foreground proposals (those matched to a GT)
+        fg_inds = nonzero_tuple((gt_classes >= 0) & (gt_classes < self.num_classes))[0]
+        if pred_deltas.shape[1] == box_dim:  # cls-agnostic regression
+            fg_pred_deltas = pred_deltas[fg_inds]
+        else:
+            fg_pred_deltas = pred_deltas.view(-1, self.num_classes, box_dim)[
+                fg_inds, gt_classes[fg_inds]
+            ]
+
+        if self.box_reg_loss_type == "smooth_l1":
+            gt_pred_deltas = self.box2box_transform.get_deltas(
+                proposal_boxes[fg_inds],
+                gt_boxes[fg_inds],
+            )
+            loss_box_reg = smooth_l1_loss(
+                fg_pred_deltas, gt_pred_deltas, self.smooth_l1_beta, reduction="sum"
+            )
+        elif self.box_reg_loss_type == "giou":
+            fg_pred_boxes = self.box2box_transform.apply_deltas(
+                fg_pred_deltas, proposal_boxes[fg_inds]
+            )
+            loss_box_reg = giou_loss(fg_pred_boxes, gt_boxes[fg_inds], reduction="sum")
+        else:
+            raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")
+        # The reg loss is normalized using the total number of regions (R), not the number
+        # of foreground regions even though the box regression loss is only defined on
+        # foreground regions. Why? Because doing so gives equal training influence to
+        # each foreground example. To see how, consider two different minibatches:
+        #  (1) Contains a single foreground region
+        #  (2) Contains 100 foreground regions
+        # If we normalize by the number of foreground regions, the single example in
+        # minibatch (1) will be given 100 times as much influence as each foreground
+        # example in minibatch (2). Normalizing by the total number of regions, R,
+        # means that the single example in minibatch (1) and each of the 100 examples
+        # in minibatch (2) are given equal influence.
+        return loss_box_reg / max(gt_classes.numel(), 1.0)  # return 0 if empty
+
+    def inference(self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were
+                used to compute predictions. The ``proposal_boxes`` field is expected.
+
+        Returns:
+            list[Instances]: same as `fast_rcnn_inference`.
+            list[Tensor]: same as `fast_rcnn_inference`.
+        """
+        boxes = self.predict_boxes(predictions, proposals)
+        scores = self.predict_probs(predictions, proposals)
+        image_shapes = [x.image_size for x in proposals]
+        return fast_rcnn_inference(
+            boxes,
+            scores,
+            image_shapes,
+            self.test_score_thresh,
+            self.test_nms_thresh,
+            self.test_topk_per_image,
+        )
+
+    def predict_boxes_for_gt_classes(self, predictions, proposals):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were used
+                to compute predictions. The fields ``proposal_boxes``, ``gt_classes`` are expected.
+
+        Returns:
+            list[Tensor]:
+                A list of Tensors of predicted boxes for GT classes in case of
+                class-specific box head. Element i of the list has shape (Ri, B), where Ri is
+                the number of proposals for image i and B is the box dimension (4 or 5)
+        """
+        if not len(proposals):
+            return []
+        scores, proposal_deltas = predictions
+        proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)
+        N, B = proposal_boxes.shape
+        predict_boxes = self.box2box_transform.apply_deltas(
+            proposal_deltas, proposal_boxes
+        )  # Nx(KxB)
+
+        K = predict_boxes.shape[1] // B
+        if K > 1:
+            gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0)
+            # Some proposals are ignored or have a background class. Their gt_classes
+            # cannot be used as index.
+            gt_classes = gt_classes.clamp_(0, K - 1)
+
+            predict_boxes = predict_boxes.view(N, K, B)[
+                torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes
+            ]
+        num_prop_per_image = [len(p) for p in proposals]
+        return predict_boxes.split(num_prop_per_image)
+
+    def predict_boxes(
+        self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]
+    ):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were
+                used to compute predictions. The ``proposal_boxes`` field is expected.
+
+        Returns:
+            list[Tensor]:
+                A list of Tensors of predicted class-specific or class-agnostic boxes
+                for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is
+                the number of proposals for image i and B is the box dimension (4 or 5)
+        """
+        if not len(proposals):
+            return []
+        _, proposal_deltas = predictions
+        num_prop_per_image = [len(p) for p in proposals]
+        proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)
+        predict_boxes = self.box2box_transform.apply_deltas(
+            proposal_deltas,
+            proposal_boxes,
+        )  # Nx(KxB)
+        return predict_boxes.split(num_prop_per_image)
+
+    def predict_probs(
+        self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]
+    ):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were
+                used to compute predictions.
+
+        Returns:
+            list[Tensor]:
+                A list of Tensors of predicted class probabilities for each image.
+                Element i has shape (Ri, K + 1), where Ri is the number of proposals for image i.
+        """
+        scores, _ = predictions
+        num_inst_per_image = [len(p) for p in proposals]
+        probs = F.softmax(scores, dim=-1)
+        return probs.split(num_inst_per_image, dim=0)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/keypoint_head.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/keypoint_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0acc138e72fcb188e4ffb3d156358b8ca59babf
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/keypoint_head.py
@@ -0,0 +1,272 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import List
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ConvTranspose2d, cat, interpolate
+from detectron2.structures import Instances, heatmaps_to_keypoints
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.registry import Registry
+
+_TOTAL_SKIPPED = 0
+
+
+__all__ = [
+    "ROI_KEYPOINT_HEAD_REGISTRY",
+    "build_keypoint_head",
+    "BaseKeypointRCNNHead",
+    "KRCNNConvDeconvUpsampleHead",
+]
+
+
+ROI_KEYPOINT_HEAD_REGISTRY = Registry("ROI_KEYPOINT_HEAD")
+ROI_KEYPOINT_HEAD_REGISTRY.__doc__ = """
+Registry for keypoint heads, which make keypoint predictions from per-region features.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+"""
+
+
+def build_keypoint_head(cfg, input_shape):
+    """
+    Build a keypoint head from `cfg.MODEL.ROI_KEYPOINT_HEAD.NAME`.
+    """
+    name = cfg.MODEL.ROI_KEYPOINT_HEAD.NAME
+    return ROI_KEYPOINT_HEAD_REGISTRY.get(name)(cfg, input_shape)
+
+
+def keypoint_rcnn_loss(pred_keypoint_logits, instances, normalizer):
+    """
+    Arguments:
+        pred_keypoint_logits (Tensor): A tensor of shape (N, K, S, S) where N is the total number
+            of instances in the batch, K is the number of keypoints, and S is the side length
+            of the keypoint heatmap. The values are spatial logits.
+        instances (list[Instances]): A list of M Instances, where M is the batch size.
+            These instances are predictions from the model
+            that are in 1:1 correspondence with pred_keypoint_logits.
+            Each Instances should contain a `gt_keypoints` field containing a `structures.Keypoint`
+            instance.
+        normalizer (float): Normalize the loss by this amount.
+            If not specified, we normalize by the number of visible keypoints in the minibatch.
+
+    Returns a scalar tensor containing the loss.
+    """
+    heatmaps = []
+    valid = []
+
+    keypoint_side_len = pred_keypoint_logits.shape[2]
+    for instances_per_image in instances:
+        if len(instances_per_image) == 0:
+            continue
+        keypoints = instances_per_image.gt_keypoints
+        heatmaps_per_image, valid_per_image = keypoints.to_heatmap(
+            instances_per_image.proposal_boxes.tensor, keypoint_side_len
+        )
+        heatmaps.append(heatmaps_per_image.view(-1))
+        valid.append(valid_per_image.view(-1))
+
+    if len(heatmaps):
+        keypoint_targets = cat(heatmaps, dim=0)
+        valid = cat(valid, dim=0).to(dtype=torch.uint8)
+        valid = torch.nonzero(valid).squeeze(1)
+
+    # torch.mean (in binary_cross_entropy_with_logits) doesn't
+    # accept empty tensors, so handle it separately
+    if len(heatmaps) == 0 or valid.numel() == 0:
+        global _TOTAL_SKIPPED
+        _TOTAL_SKIPPED += 1
+        storage = get_event_storage()
+        storage.put_scalar("kpts_num_skipped_batches", _TOTAL_SKIPPED, smoothing_hint=False)
+        return pred_keypoint_logits.sum() * 0
+
+    N, K, H, W = pred_keypoint_logits.shape
+    pred_keypoint_logits = pred_keypoint_logits.view(N * K, H * W)
+
+    keypoint_loss = F.cross_entropy(
+        pred_keypoint_logits[valid], keypoint_targets[valid], reduction="sum"
+    )
+
+    # If a normalizer isn't specified, normalize by the number of visible keypoints in the minibatch
+    if normalizer is None:
+        normalizer = valid.numel()
+    keypoint_loss /= normalizer
+
+    return keypoint_loss
+
+
+def keypoint_rcnn_inference(pred_keypoint_logits: torch.Tensor, pred_instances: List[Instances]):
+    """
+    Post process each predicted keypoint heatmap in `pred_keypoint_logits` into (x, y, score)
+        and add it to the `pred_instances` as a `pred_keypoints` field.
+
+    Args:
+        pred_keypoint_logits (Tensor): A tensor of shape (R, K, S, S) where R is the total number
+           of instances in the batch, K is the number of keypoints, and S is the side length of
+           the keypoint heatmap. The values are spatial logits.
+        pred_instances (list[Instances]): A list of N Instances, where N is the number of images.
+
+    Returns:
+        None. Each element in pred_instances will contain extra "pred_keypoints" and
+            "pred_keypoint_heatmaps" fields. "pred_keypoints" is a tensor of shape
+            (#instance, K, 3) where the last dimension corresponds to (x, y, score).
+            The scores are larger than 0. "pred_keypoint_heatmaps" contains the raw
+            keypoint logits as passed to this function.
+    """
+    # flatten all bboxes from all images together (list[Boxes] -> Rx4 tensor)
+    bboxes_flat = cat([b.pred_boxes.tensor for b in pred_instances], dim=0)
+
+    pred_keypoint_logits = pred_keypoint_logits.detach()
+    keypoint_results = heatmaps_to_keypoints(pred_keypoint_logits, bboxes_flat.detach())
+    num_instances_per_image = [len(i) for i in pred_instances]
+    keypoint_results = keypoint_results[:, :, [0, 1, 3]].split(num_instances_per_image, dim=0)
+    heatmap_results = pred_keypoint_logits.split(num_instances_per_image, dim=0)
+
+    for keypoint_results_per_image, heatmap_results_per_image, instances_per_image in zip(
+        keypoint_results, heatmap_results, pred_instances
+    ):
+        # keypoint_results_per_image is (num instances)x(num keypoints)x(x, y, score)
+        # heatmap_results_per_image is (num instances)x(num keypoints)x(side)x(side)
+        instances_per_image.pred_keypoints = keypoint_results_per_image
+        instances_per_image.pred_keypoint_heatmaps = heatmap_results_per_image
+
+
+class BaseKeypointRCNNHead(nn.Module):
+    """
+    Implement the basic Keypoint R-CNN losses and inference logic described in
+    Sec. 5 of :paper:`Mask R-CNN`.
+    """
+
+    @configurable
+    def __init__(self, *, num_keypoints, loss_weight=1.0, loss_normalizer=1.0):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            num_keypoints (int): number of keypoints to predict
+            loss_weight (float): weight to multiple on the keypoint loss
+            loss_normalizer (float or str):
+                If float, divide the loss by `loss_normalizer * #images`.
+                If 'visible', the loss is normalized by the total number of
+                visible keypoints across images.
+        """
+        super().__init__()
+        self.num_keypoints = num_keypoints
+        self.loss_weight = loss_weight
+        assert loss_normalizer == "visible" or isinstance(loss_normalizer, float), loss_normalizer
+        self.loss_normalizer = loss_normalizer
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = {
+            "loss_weight": cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT,
+            "num_keypoints": cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS,
+        }
+        normalize_by_visible = (
+            cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS
+        )  # noqa
+        if not normalize_by_visible:
+            batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
+            positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
+            ret["loss_normalizer"] = (
+                ret["num_keypoints"] * batch_size_per_image * positive_sample_fraction
+            )
+        else:
+            ret["loss_normalizer"] = "visible"
+        return ret
+
+    def forward(self, x, instances: List[Instances]):
+        """
+        Args:
+            x: input 4D region feature(s) provided by :class:`ROIHeads`.
+            instances (list[Instances]): contains the boxes & labels corresponding
+                to the input features.
+                Exact format is up to its caller to decide.
+                Typically, this is the foreground instances in training, with
+                "proposal_boxes" field and other gt annotations.
+                In inference, it contains boxes that are already predicted.
+
+        Returns:
+            A dict of losses if in training. The predicted "instances" if in inference.
+        """
+        x = self.layers(x)
+        if self.training:
+            num_images = len(instances)
+            normalizer = (
+                None if self.loss_normalizer == "visible" else num_images * self.loss_normalizer
+            )
+            return {
+                "loss_keypoint": keypoint_rcnn_loss(x, instances, normalizer=normalizer)
+                * self.loss_weight
+            }
+        else:
+            keypoint_rcnn_inference(x, instances)
+            return instances
+
+    def layers(self, x):
+        """
+        Neural network layers that makes predictions from regional input features.
+        """
+        raise NotImplementedError
+
+
+# To get torchscript support, we make the head a subclass of `nn.Sequential`.
+# Therefore, to add new layers in this head class, please make sure they are
+# added in the order they will be used in forward().
+@ROI_KEYPOINT_HEAD_REGISTRY.register()
+class KRCNNConvDeconvUpsampleHead(BaseKeypointRCNNHead, nn.Sequential):
+    """
+    A standard keypoint head containing a series of 3x3 convs, followed by
+    a transpose convolution and bilinear interpolation for upsampling.
+    It is described in Sec. 5 of :paper:`Mask R-CNN`.
+    """
+
+    @configurable
+    def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (ShapeSpec): shape of the input feature
+            conv_dims: an iterable of output channel counts for each conv in the head
+                         e.g. (512, 512, 512) for three convs outputting 512 channels.
+        """
+        super().__init__(num_keypoints=num_keypoints, **kwargs)
+
+        # default up_scale to 2.0 (this can be made an option)
+        up_scale = 2.0
+        in_channels = input_shape.channels
+
+        for idx, layer_channels in enumerate(conv_dims, 1):
+            module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1)
+            self.add_module("conv_fcn{}".format(idx), module)
+            self.add_module("conv_fcn_relu{}".format(idx), nn.ReLU())
+            in_channels = layer_channels
+
+        deconv_kernel = 4
+        self.score_lowres = ConvTranspose2d(
+            in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1
+        )
+        self.up_scale = up_scale
+
+        for name, param in self.named_parameters():
+            if "bias" in name:
+                nn.init.constant_(param, 0)
+            elif "weight" in name:
+                # Caffe2 implementation uses MSRAFill, which in fact
+                # corresponds to kaiming_normal_ in PyTorch
+                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = super().from_config(cfg, input_shape)
+        ret["input_shape"] = input_shape
+        ret["conv_dims"] = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS
+        return ret
+
+    def layers(self, x):
+        for layer in self:
+            x = layer(x)
+        x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
+        return x
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/mask_head.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/mask_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ac5c4b9aaa34653d6c50e512a5a4300da450c7f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/mask_head.py
@@ -0,0 +1,292 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import List
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm
+from detectron2.structures import Instances
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.registry import Registry
+
+__all__ = [
+    "BaseMaskRCNNHead",
+    "MaskRCNNConvUpsampleHead",
+    "build_mask_head",
+    "ROI_MASK_HEAD_REGISTRY",
+]
+
+
+ROI_MASK_HEAD_REGISTRY = Registry("ROI_MASK_HEAD")
+ROI_MASK_HEAD_REGISTRY.__doc__ = """
+Registry for mask heads, which predicts instance masks given
+per-region features.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+"""
+
+
+@torch.jit.unused
+def mask_rcnn_loss(pred_mask_logits: torch.Tensor, instances: List[Instances], vis_period: int = 0):
+    """
+    Compute the mask prediction loss defined in the Mask R-CNN paper.
+
+    Args:
+        pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask)
+            for class-specific or class-agnostic, where B is the total number of predicted masks
+            in all images, C is the number of foreground classes, and Hmask, Wmask are the height
+            and width of the mask predictions. The values are logits.
+        instances (list[Instances]): A list of N Instances, where N is the number of images
+            in the batch. These instances are in 1:1
+            correspondence with the pred_mask_logits. The ground-truth labels (class, box, mask,
+            ...) associated with each instance are stored in fields.
+        vis_period (int): the period (in steps) to dump visualization.
+
+    Returns:
+        mask_loss (Tensor): A scalar tensor containing the loss.
+    """
+    cls_agnostic_mask = pred_mask_logits.size(1) == 1
+    total_num_masks = pred_mask_logits.size(0)
+    mask_side_len = pred_mask_logits.size(2)
+    assert pred_mask_logits.size(2) == pred_mask_logits.size(3), "Mask prediction must be square!"
+
+    gt_classes = []
+    gt_masks = []
+    for instances_per_image in instances:
+        if len(instances_per_image) == 0:
+            continue
+        if not cls_agnostic_mask:
+            gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64)
+            gt_classes.append(gt_classes_per_image)
+
+        gt_masks_per_image = instances_per_image.gt_masks.crop_and_resize(
+            instances_per_image.proposal_boxes.tensor, mask_side_len
+        ).to(device=pred_mask_logits.device)
+        # A tensor of shape (N, M, M), N=#instances in the image; M=mask_side_len
+        gt_masks.append(gt_masks_per_image)
+
+    if len(gt_masks) == 0:
+        return pred_mask_logits.sum() * 0
+
+    gt_masks = cat(gt_masks, dim=0)
+
+    if cls_agnostic_mask:
+        pred_mask_logits = pred_mask_logits[:, 0]
+    else:
+        indices = torch.arange(total_num_masks)
+        gt_classes = cat(gt_classes, dim=0)
+        pred_mask_logits = pred_mask_logits[indices, gt_classes]
+
+    if gt_masks.dtype == torch.bool:
+        gt_masks_bool = gt_masks
+    else:
+        # Here we allow gt_masks to be float as well (depend on the implementation of rasterize())
+        gt_masks_bool = gt_masks > 0.5
+    gt_masks = gt_masks.to(dtype=torch.float32)
+
+    # Log the training accuracy (using gt classes and 0.5 threshold)
+    mask_incorrect = (pred_mask_logits > 0.0) != gt_masks_bool
+    mask_accuracy = 1 - (mask_incorrect.sum().item() / max(mask_incorrect.numel(), 1.0))
+    num_positive = gt_masks_bool.sum().item()
+    false_positive = (mask_incorrect & ~gt_masks_bool).sum().item() / max(
+        gt_masks_bool.numel() - num_positive, 1.0
+    )
+    false_negative = (mask_incorrect & gt_masks_bool).sum().item() / max(num_positive, 1.0)
+
+    storage = get_event_storage()
+    storage.put_scalar("mask_rcnn/accuracy", mask_accuracy)
+    storage.put_scalar("mask_rcnn/false_positive", false_positive)
+    storage.put_scalar("mask_rcnn/false_negative", false_negative)
+    if vis_period > 0 and storage.iter % vis_period == 0:
+        pred_masks = pred_mask_logits.sigmoid()
+        vis_masks = torch.cat([pred_masks, gt_masks], axis=2)
+        name = "Left: mask prediction;   Right: mask GT"
+        for idx, vis_mask in enumerate(vis_masks):
+            vis_mask = torch.stack([vis_mask] * 3, axis=0)
+            storage.put_image(name + f" ({idx})", vis_mask)
+
+    mask_loss = F.binary_cross_entropy_with_logits(pred_mask_logits, gt_masks, reduction="mean")
+    return mask_loss
+
+
+def mask_rcnn_inference(pred_mask_logits: torch.Tensor, pred_instances: List[Instances]):
+    """
+    Convert pred_mask_logits to estimated foreground probability masks while also
+    extracting only the masks for the predicted classes in pred_instances. For each
+    predicted box, the mask of the same class is attached to the instance by adding a
+    new "pred_masks" field to pred_instances.
+
+    Args:
+        pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask)
+            for class-specific or class-agnostic, where B is the total number of predicted masks
+            in all images, C is the number of foreground classes, and Hmask, Wmask are the height
+            and width of the mask predictions. The values are logits.
+        pred_instances (list[Instances]): A list of N Instances, where N is the number of images
+            in the batch. Each Instances must have field "pred_classes".
+
+    Returns:
+        None. pred_instances will contain an extra "pred_masks" field storing a mask of size (Hmask,
+            Wmask) for predicted class. Note that the masks are returned as a soft (non-quantized)
+            masks the resolution predicted by the network; post-processing steps, such as resizing
+            the predicted masks to the original image resolution and/or binarizing them, is left
+            to the caller.
+    """
+    cls_agnostic_mask = pred_mask_logits.size(1) == 1
+
+    if cls_agnostic_mask:
+        mask_probs_pred = pred_mask_logits.sigmoid()
+    else:
+        # Select masks corresponding to the predicted classes
+        num_masks = pred_mask_logits.shape[0]
+        class_pred = cat([i.pred_classes for i in pred_instances])
+        indices = torch.arange(num_masks, device=class_pred.device)
+        mask_probs_pred = pred_mask_logits[indices, class_pred][:, None].sigmoid()
+    # mask_probs_pred.shape: (B, 1, Hmask, Wmask)
+
+    num_boxes_per_image = [len(i) for i in pred_instances]
+    mask_probs_pred = mask_probs_pred.split(num_boxes_per_image, dim=0)
+
+    for prob, instances in zip(mask_probs_pred, pred_instances):
+        instances.pred_masks = prob  # (1, Hmask, Wmask)
+
+
+class BaseMaskRCNNHead(nn.Module):
+    """
+    Implement the basic Mask R-CNN losses and inference logic described in :paper:`Mask R-CNN`
+    """
+
+    @configurable
+    def __init__(self, *, loss_weight: float = 1.0, vis_period: int = 0):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            loss_weight (float): multiplier of the loss
+            vis_period (int): visualization period
+        """
+        super().__init__()
+        self.vis_period = vis_period
+        self.loss_weight = loss_weight
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {"vis_period": cfg.VIS_PERIOD}
+
+    def forward(self, x, instances: List[Instances]):
+        """
+        Args:
+            x: input region feature(s) provided by :class:`ROIHeads`.
+            instances (list[Instances]): contains the boxes & labels corresponding
+                to the input features.
+                Exact format is up to its caller to decide.
+                Typically, this is the foreground instances in training, with
+                "proposal_boxes" field and other gt annotations.
+                In inference, it contains boxes that are already predicted.
+
+        Returns:
+            A dict of losses in training. The predicted "instances" in inference.
+        """
+        x = self.layers(x)
+        if self.training:
+            return {"loss_mask": mask_rcnn_loss(x, instances, self.vis_period) * self.loss_weight}
+        else:
+            mask_rcnn_inference(x, instances)
+            return instances
+
+    def layers(self, x):
+        """
+        Neural network layers that makes predictions from input features.
+        """
+        raise NotImplementedError
+
+
+# To get torchscript support, we make the head a subclass of `nn.Sequential`.
+# Therefore, to add new layers in this head class, please make sure they are
+# added in the order they will be used in forward().
+@ROI_MASK_HEAD_REGISTRY.register()
+class MaskRCNNConvUpsampleHead(BaseMaskRCNNHead, nn.Sequential):
+    """
+    A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`).
+    Predictions are made with a final 1x1 conv layer.
+    """
+
+    @configurable
+    def __init__(self, input_shape: ShapeSpec, *, num_classes, conv_dims, conv_norm="", **kwargs):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (ShapeSpec): shape of the input feature
+            num_classes (int): the number of foreground classes (i.e. background is not
+                included). 1 if using class agnostic prediction.
+            conv_dims (list[int]): a list of N>0 integers representing the output dimensions
+                of N-1 conv layers and the last upsample layer.
+            conv_norm (str or callable): normalization for the conv layers.
+                See :func:`detectron2.layers.get_norm` for supported types.
+        """
+        super().__init__(**kwargs)
+        assert len(conv_dims) >= 1, "conv_dims have to be non-empty!"
+
+        self.conv_norm_relus = []
+
+        cur_channels = input_shape.channels
+        for k, conv_dim in enumerate(conv_dims[:-1]):
+            conv = Conv2d(
+                cur_channels,
+                conv_dim,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                bias=not conv_norm,
+                norm=get_norm(conv_norm, conv_dim),
+                activation=nn.ReLU(),
+            )
+            self.add_module("mask_fcn{}".format(k + 1), conv)
+            self.conv_norm_relus.append(conv)
+            cur_channels = conv_dim
+
+        self.deconv = ConvTranspose2d(
+            cur_channels, conv_dims[-1], kernel_size=2, stride=2, padding=0
+        )
+        self.add_module("deconv_relu", nn.ReLU())
+        cur_channels = conv_dims[-1]
+
+        self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1, stride=1, padding=0)
+
+        for layer in self.conv_norm_relus + [self.deconv]:
+            weight_init.c2_msra_fill(layer)
+        # use normal distribution initialization for mask prediction layer
+        nn.init.normal_(self.predictor.weight, std=0.001)
+        if self.predictor.bias is not None:
+            nn.init.constant_(self.predictor.bias, 0)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = super().from_config(cfg, input_shape)
+        conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
+        num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
+        ret.update(
+            conv_dims=[conv_dim] * (num_conv + 1),  # +1 for ConvTranspose
+            conv_norm=cfg.MODEL.ROI_MASK_HEAD.NORM,
+            input_shape=input_shape,
+        )
+        if cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK:
+            ret["num_classes"] = 1
+        else:
+            ret["num_classes"] = cfg.MODEL.ROI_HEADS.NUM_CLASSES
+        return ret
+
+    def layers(self, x):
+        for layer in self:
+            x = layer(x)
+        return x
+
+
+def build_mask_head(cfg, input_shape):
+    """
+    Build a mask head defined by `cfg.MODEL.ROI_MASK_HEAD.NAME`.
+    """
+    name = cfg.MODEL.ROI_MASK_HEAD.NAME
+    return ROI_MASK_HEAD_REGISTRY.get(name)(cfg, input_shape)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/roi_heads.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/roi_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..64f5e7510ee0509f5db53d022d982380ce6cf886
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/roi_heads.py
@@ -0,0 +1,870 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import inspect
+import logging
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec, nonzero_tuple
+from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.registry import Registry
+
+from ..backbone.resnet import BottleneckBlock, ResNet
+from ..matcher import Matcher
+from ..poolers import ROIPooler
+from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals
+from ..sampling import subsample_labels
+from .box_head import build_box_head
+from .fast_rcnn import FastRCNNOutputLayers
+from .keypoint_head import build_keypoint_head
+from .mask_head import build_mask_head
+
+ROI_HEADS_REGISTRY = Registry("ROI_HEADS")
+ROI_HEADS_REGISTRY.__doc__ = """
+Registry for ROI heads in a generalized R-CNN model.
+ROIHeads take feature maps and region proposals, and
+perform per-region computation.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+The call is expected to return an :class:`ROIHeads`.
+"""
+
+logger = logging.getLogger(__name__)
+
+
+def build_roi_heads(cfg, input_shape):
+    """
+    Build ROIHeads defined by `cfg.MODEL.ROI_HEADS.NAME`.
+    """
+    name = cfg.MODEL.ROI_HEADS.NAME
+    return ROI_HEADS_REGISTRY.get(name)(cfg, input_shape)
+
+
+def select_foreground_proposals(
+    proposals: List[Instances], bg_label: int
+) -> Tuple[List[Instances], List[torch.Tensor]]:
+    """
+    Given a list of N Instances (for N images), each containing a `gt_classes` field,
+    return a list of Instances that contain only instances with `gt_classes != -1 &&
+    gt_classes != bg_label`.
+
+    Args:
+        proposals (list[Instances]): A list of N Instances, where N is the number of
+            images in the batch.
+        bg_label: label index of background class.
+
+    Returns:
+        list[Instances]: N Instances, each contains only the selected foreground instances.
+        list[Tensor]: N boolean vector, correspond to the selection mask of
+            each Instances object. True for selected instances.
+    """
+    assert isinstance(proposals, (list, tuple))
+    assert isinstance(proposals[0], Instances)
+    assert proposals[0].has("gt_classes")
+    fg_proposals = []
+    fg_selection_masks = []
+    for proposals_per_image in proposals:
+        gt_classes = proposals_per_image.gt_classes
+        fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label)
+        fg_idxs = fg_selection_mask.nonzero().squeeze(1)
+        fg_proposals.append(proposals_per_image[fg_idxs])
+        fg_selection_masks.append(fg_selection_mask)
+    return fg_proposals, fg_selection_masks
+
+
+def select_proposals_with_visible_keypoints(proposals: List[Instances]) -> List[Instances]:
+    """
+    Args:
+        proposals (list[Instances]): a list of N Instances, where N is the
+            number of images.
+
+    Returns:
+        proposals: only contains proposals with at least one visible keypoint.
+
+    Note that this is still slightly different from Detectron.
+    In Detectron, proposals for training keypoint head are re-sampled from
+    all the proposals with IOU>threshold & >=1 visible keypoint.
+
+    Here, the proposals are first sampled from all proposals with
+    IOU>threshold, then proposals with no visible keypoint are filtered out.
+    This strategy seems to make no difference on Detectron and is easier to implement.
+    """
+    ret = []
+    all_num_fg = []
+    for proposals_per_image in proposals:
+        # If empty/unannotated image (hard negatives), skip filtering for train
+        if len(proposals_per_image) == 0:
+            ret.append(proposals_per_image)
+            continue
+        gt_keypoints = proposals_per_image.gt_keypoints.tensor
+        # #fg x K x 3
+        vis_mask = gt_keypoints[:, :, 2] >= 1
+        xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1]
+        proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(dim=1)  # #fg x 1 x 4
+        kp_in_box = (
+            (xs >= proposal_boxes[:, :, 0])
+            & (xs <= proposal_boxes[:, :, 2])
+            & (ys >= proposal_boxes[:, :, 1])
+            & (ys <= proposal_boxes[:, :, 3])
+        )
+        selection = (kp_in_box & vis_mask).any(dim=1)
+        selection_idxs = nonzero_tuple(selection)[0]
+        all_num_fg.append(selection_idxs.numel())
+        ret.append(proposals_per_image[selection_idxs])
+
+    storage = get_event_storage()
+    storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg))
+    return ret
+
+
+class ROIHeads(torch.nn.Module):
+    """
+    ROIHeads perform all per-region computation in an R-CNN.
+
+    It typically contains logic to
+
+    1. (in training only) match proposals with ground truth and sample them
+    2. crop the regions and extract per-region features using proposals
+    3. make per-region predictions with different heads
+
+    It can have many variants, implemented as subclasses of this class.
+    This base class contains the logic to match/sample proposals.
+    But it is not necessary to inherit this class if the sampling logic is not needed.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        num_classes,
+        batch_size_per_image,
+        positive_fraction,
+        proposal_matcher,
+        proposal_append_gt=True
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            num_classes (int): number of foreground classes (i.e. background is not included)
+            batch_size_per_image (int): number of proposals to sample for training
+            positive_fraction (float): fraction of positive (foreground) proposals
+                to sample for training.
+            proposal_matcher (Matcher): matcher that matches proposals and ground truth
+            proposal_append_gt (bool): whether to include ground truth as proposals as well
+        """
+        super().__init__()
+        self.batch_size_per_image = batch_size_per_image
+        self.positive_fraction = positive_fraction
+        self.num_classes = num_classes
+        self.proposal_matcher = proposal_matcher
+        self.proposal_append_gt = proposal_append_gt
+
+    @classmethod
+    def from_config(cls, cfg):
+        return {
+            "batch_size_per_image": cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE,
+            "positive_fraction": cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION,
+            "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES,
+            "proposal_append_gt": cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT,
+            # Matcher to assign box proposals to gt boxes
+            "proposal_matcher": Matcher(
+                cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
+                cfg.MODEL.ROI_HEADS.IOU_LABELS,
+                allow_low_quality_matches=False,
+            ),
+        }
+
+    def _sample_proposals(
+        self, matched_idxs: torch.Tensor, matched_labels: torch.Tensor, gt_classes: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Based on the matching between N proposals and M groundtruth,
+        sample the proposals and set their classification labels.
+
+        Args:
+            matched_idxs (Tensor): a vector of length N, each is the best-matched
+                gt index in [0, M) for each proposal.
+            matched_labels (Tensor): a vector of length N, the matcher's label
+                (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal.
+            gt_classes (Tensor): a vector of length M.
+
+        Returns:
+            Tensor: a vector of indices of sampled proposals. Each is in [0, N).
+            Tensor: a vector of the same length, the classification label for
+                each sampled proposal. Each sample is labeled as either a category in
+                [0, num_classes) or the background (num_classes).
+        """
+        has_gt = gt_classes.numel() > 0
+        # Get the corresponding GT for each proposal
+        if has_gt:
+            gt_classes = gt_classes[matched_idxs]
+            # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
+            gt_classes[matched_labels == 0] = self.num_classes
+            # Label ignore proposals (-1 label)
+            gt_classes[matched_labels == -1] = -1
+        else:
+            gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
+
+        sampled_fg_idxs, sampled_bg_idxs = subsample_labels(
+            gt_classes, self.batch_size_per_image, self.positive_fraction, self.num_classes
+        )
+
+        sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0)
+        return sampled_idxs, gt_classes[sampled_idxs]
+
+    @torch.no_grad()
+    def label_and_sample_proposals(
+        self, proposals: List[Instances], targets: List[Instances]
+    ) -> List[Instances]:
+        """
+        Prepare some proposals to be used to train the ROI heads.
+        It performs box matching between `proposals` and `targets`, and assigns
+        training labels to the proposals.
+        It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth
+        boxes, with a fraction of positives that is no larger than
+        ``self.positive_fraction``.
+
+        Args:
+            See :meth:`ROIHeads.forward`
+
+        Returns:
+            list[Instances]:
+                length `N` list of `Instances`s containing the proposals
+                sampled for training. Each `Instances` has the following fields:
+
+                - proposal_boxes: the proposal boxes
+                - gt_boxes: the ground-truth box that the proposal is assigned to
+                  (this is only meaningful if the proposal has a label > 0; if label = 0
+                  then the ground-truth box is random)
+
+                Other fields such as "gt_classes", "gt_masks", that's included in `targets`.
+        """
+        gt_boxes = [x.gt_boxes for x in targets]
+        # Augment proposals with ground-truth boxes.
+        # In the case of learned proposals (e.g., RPN), when training starts
+        # the proposals will be low quality due to random initialization.
+        # It's possible that none of these initial
+        # proposals have high enough overlap with the gt objects to be used
+        # as positive examples for the second stage components (box head,
+        # cls head, mask head). Adding the gt boxes to the set of proposals
+        # ensures that the second stage components will have some positive
+        # examples from the start of training. For RPN, this augmentation improves
+        # convergence and empirically improves box AP on COCO by about 0.5
+        # points (under one tested configuration).
+        if self.proposal_append_gt:
+            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)
+
+        proposals_with_gt = []
+
+        num_fg_samples = []
+        num_bg_samples = []
+        for proposals_per_image, targets_per_image in zip(proposals, targets):
+            has_gt = len(targets_per_image) > 0
+            match_quality_matrix = pairwise_iou(
+                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
+            )
+            matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix)
+            sampled_idxs, gt_classes = self._sample_proposals(
+                matched_idxs, matched_labels, targets_per_image.gt_classes
+            )
+
+            # Set target attributes of the sampled proposals:
+            proposals_per_image = proposals_per_image[sampled_idxs]
+            proposals_per_image.gt_classes = gt_classes
+
+            if has_gt:
+                sampled_targets = matched_idxs[sampled_idxs]
+                # We index all the attributes of targets that start with "gt_"
+                # and have not been added to proposals yet (="gt_classes").
+                # NOTE: here the indexing waste some compute, because heads
+                # like masks, keypoints, etc, will filter the proposals again,
+                # (by foreground/background, or number of keypoints in the image, etc)
+                # so we essentially index the data twice.
+                for (trg_name, trg_value) in targets_per_image.get_fields().items():
+                    if trg_name.startswith("gt_") and not proposals_per_image.has(trg_name):
+                        proposals_per_image.set(trg_name, trg_value[sampled_targets])
+            # If no GT is given in the image, we don't know what a dummy gt value can be.
+            # Therefore the returned proposals won't have any gt_* fields, except for a
+            # gt_classes full of background label.
+
+            num_bg_samples.append((gt_classes == self.num_classes).sum().item())
+            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
+            proposals_with_gt.append(proposals_per_image)
+
+        # Log the number of fg/bg samples that are selected for training ROI heads
+        storage = get_event_storage()
+        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
+        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))
+
+        return proposals_with_gt
+
+    def forward(
+        self,
+        images: ImageList,
+        features: Dict[str, torch.Tensor],
+        proposals: List[Instances],
+        targets: Optional[List[Instances]] = None,
+    ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]:
+        """
+        Args:
+            images (ImageList):
+            features (dict[str,Tensor]): input data as a mapping from feature
+                map name to tensor. Axis 0 represents the number of images `N` in
+                the input data; axes 1-3 are channels, height, and width, which may
+                vary between feature maps (e.g., if a feature pyramid is used).
+            proposals (list[Instances]): length `N` list of `Instances`. The i-th
+                `Instances` contains object proposals for the i-th input image,
+                with fields "proposal_boxes" and "objectness_logits".
+            targets (list[Instances], optional): length `N` list of `Instances`. The i-th
+                `Instances` contains the ground-truth per-instance annotations
+                for the i-th input image.  Specify `targets` during training only.
+                It may have the following fields:
+
+                - gt_boxes: the bounding box of each instance.
+                - gt_classes: the label for each instance with a category ranging in [0, #class].
+                - gt_masks: PolygonMasks or BitMasks, the ground-truth masks of each instance.
+                - gt_keypoints: NxKx3, the groud-truth keypoints for each instance.
+
+        Returns:
+            list[Instances]: length `N` list of `Instances` containing the
+            detected instances. Returned during inference only; may be [] during training.
+
+            dict[str->Tensor]:
+            mapping from a named loss to a tensor storing the loss. Used during training only.
+        """
+        raise NotImplementedError()
+
+
+@ROI_HEADS_REGISTRY.register()
+class Res5ROIHeads(ROIHeads):
+    """
+    The ROIHeads in a typical "C4" R-CNN model, where
+    the box and mask head share the cropping and
+    the per-region feature computation by a Res5 block.
+    See :paper:`ResNet` Appendix A.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        in_features: List[str],
+        pooler: ROIPooler,
+        res5: nn.Module,
+        box_predictor: nn.Module,
+        mask_head: Optional[nn.Module] = None,
+        **kwargs
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            in_features (list[str]): list of backbone feature map names to use for
+                feature extraction
+            pooler (ROIPooler): pooler to extra region features from backbone
+            res5 (nn.Sequential): a CNN to compute per-region features, to be used by
+                ``box_predictor`` and ``mask_head``. Typically this is a "res5"
+                block from a ResNet.
+            box_predictor (nn.Module): make box predictions from the feature.
+                Should have the same interface as :class:`FastRCNNOutputLayers`.
+            mask_head (nn.Module): transform features to make mask predictions
+        """
+        super().__init__(**kwargs)
+        self.in_features = in_features
+        self.pooler = pooler
+        if isinstance(res5, (list, tuple)):
+            res5 = nn.Sequential(*res5)
+        self.res5 = res5
+        self.box_predictor = box_predictor
+        self.mask_on = mask_head is not None
+        if self.mask_on:
+            self.mask_head = mask_head
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        # fmt: off
+        ret = super().from_config(cfg)
+        in_features = ret["in_features"] = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+        pooler_scales     = (1.0 / input_shape[in_features[0]].stride, )
+        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        mask_on           = cfg.MODEL.MASK_ON
+        # fmt: on
+        assert not cfg.MODEL.KEYPOINT_ON
+        assert len(in_features) == 1
+
+        ret["pooler"] = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+
+        # Compatbility with old moco code. Might be useful.
+        # See notes in StandardROIHeads.from_config
+        if not inspect.ismethod(cls._build_res5_block):
+            logger.warning(
+                "The behavior of _build_res5_block may change. "
+                "Please do not depend on private methods."
+            )
+            cls._build_res5_block = classmethod(cls._build_res5_block)
+
+        ret["res5"], out_channels = cls._build_res5_block(cfg)
+        ret["box_predictor"] = FastRCNNOutputLayers(
+            cfg, ShapeSpec(channels=out_channels, height=1, width=1)
+        )
+
+        if mask_on:
+            ret["mask_head"] = build_mask_head(
+                cfg,
+                ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution),
+            )
+        return ret
+
+    @classmethod
+    def _build_res5_block(cls, cfg):
+        # fmt: off
+        stage_channel_factor = 2 ** 3  # res5 is 8x res2
+        num_groups           = cfg.MODEL.RESNETS.NUM_GROUPS
+        width_per_group      = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
+        bottleneck_channels  = num_groups * width_per_group * stage_channel_factor
+        out_channels         = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor
+        stride_in_1x1        = cfg.MODEL.RESNETS.STRIDE_IN_1X1
+        norm                 = cfg.MODEL.RESNETS.NORM
+        assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \
+            "Deformable conv is not yet supported in res5 head."
+        # fmt: on
+
+        blocks = ResNet.make_stage(
+            BottleneckBlock,
+            3,
+            stride_per_block=[2, 1, 1],
+            in_channels=out_channels // 2,
+            bottleneck_channels=bottleneck_channels,
+            out_channels=out_channels,
+            num_groups=num_groups,
+            norm=norm,
+            stride_in_1x1=stride_in_1x1,
+        )
+        return nn.Sequential(*blocks), out_channels
+
+    def _shared_roi_transform(self, features, boxes):
+        x = self.pooler(features, boxes)
+        return self.res5(x)
+
+    def forward(self, images, features, proposals, targets=None):
+        """
+        See :meth:`ROIHeads.forward`.
+        """
+        del images
+
+        if self.training:
+            assert targets
+            proposals = self.label_and_sample_proposals(proposals, targets)
+        del targets
+
+        proposal_boxes = [x.proposal_boxes for x in proposals]
+        box_features = self._shared_roi_transform(
+            [features[f] for f in self.in_features], proposal_boxes
+        )
+        predictions = self.box_predictor(box_features.mean(dim=[2, 3]))
+
+        if self.training:
+            del features
+            losses = self.box_predictor.losses(predictions, proposals)
+            if self.mask_on:
+                proposals, fg_selection_masks = select_foreground_proposals(
+                    proposals, self.num_classes
+                )
+                # Since the ROI feature transform is shared between boxes and masks,
+                # we don't need to recompute features. The mask loss is only defined
+                # on foreground proposals, so we need to select out the foreground
+                # features.
+                mask_features = box_features[torch.cat(fg_selection_masks, dim=0)]
+                del box_features
+                losses.update(self.mask_head(mask_features, proposals))
+            return [], losses
+        else:
+            pred_instances, _ = self.box_predictor.inference(predictions, proposals)
+            pred_instances = self.forward_with_given_boxes(features, pred_instances)
+            return pred_instances, {}
+
+    def forward_with_given_boxes(self, features, instances):
+        """
+        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+
+        Args:
+            features: same as in `forward()`
+            instances (list[Instances]): instances to predict other outputs. Expect the keys
+                "pred_boxes" and "pred_classes" to exist.
+
+        Returns:
+            instances (Instances):
+                the same `Instances` object, with extra
+                fields such as `pred_masks` or `pred_keypoints`.
+        """
+        assert not self.training
+        assert instances[0].has("pred_boxes") and instances[0].has("pred_classes")
+
+        if self.mask_on:
+            features = [features[f] for f in self.in_features]
+            x = self._shared_roi_transform(features, [x.pred_boxes for x in instances])
+            return self.mask_head(x, instances)
+        else:
+            return instances
+
+
+@ROI_HEADS_REGISTRY.register()
+class StandardROIHeads(ROIHeads):
+    """
+    It's "standard" in a sense that there is no ROI transform sharing
+    or feature sharing between tasks.
+    Each head independently processes the input features by each head's
+    own pooler and head.
+
+    This class is used by most models, such as FPN and C5.
+    To implement more models, you can subclass it and implement a different
+    :meth:`forward()` or a head.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        box_in_features: List[str],
+        box_pooler: ROIPooler,
+        box_head: nn.Module,
+        box_predictor: nn.Module,
+        mask_in_features: Optional[List[str]] = None,
+        mask_pooler: Optional[ROIPooler] = None,
+        mask_head: Optional[nn.Module] = None,
+        keypoint_in_features: Optional[List[str]] = None,
+        keypoint_pooler: Optional[ROIPooler] = None,
+        keypoint_head: Optional[nn.Module] = None,
+        train_on_pred_boxes: bool = False,
+        **kwargs
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            box_in_features (list[str]): list of feature names to use for the box head.
+            box_pooler (ROIPooler): pooler to extra region features for box head
+            box_head (nn.Module): transform features to make box predictions
+            box_predictor (nn.Module): make box predictions from the feature.
+                Should have the same interface as :class:`FastRCNNOutputLayers`.
+            mask_in_features (list[str]): list of feature names to use for the mask
+                pooler or mask head. None if not using mask head.
+            mask_pooler (ROIPooler): pooler to extract region features from image features.
+                The mask head will then take region features to make predictions.
+                If None, the mask head will directly take the dict of image features
+                defined by `mask_in_features`
+            mask_head (nn.Module): transform features to make mask predictions
+            keypoint_in_features, keypoint_pooler, keypoint_head: similar to ``mask_*``.
+            train_on_pred_boxes (bool): whether to use proposal boxes or
+                predicted boxes from the box head to train other heads.
+        """
+        super().__init__(**kwargs)
+        # keep self.in_features for backward compatibility
+        self.in_features = self.box_in_features = box_in_features
+        self.box_pooler = box_pooler
+        self.box_head = box_head
+        self.box_predictor = box_predictor
+
+        self.mask_on = mask_in_features is not None
+        if self.mask_on:
+            self.mask_in_features = mask_in_features
+            self.mask_pooler = mask_pooler
+            self.mask_head = mask_head
+
+        self.keypoint_on = keypoint_in_features is not None
+        if self.keypoint_on:
+            self.keypoint_in_features = keypoint_in_features
+            self.keypoint_pooler = keypoint_pooler
+            self.keypoint_head = keypoint_head
+
+        self.train_on_pred_boxes = train_on_pred_boxes
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = super().from_config(cfg)
+        ret["train_on_pred_boxes"] = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES
+        # Subclasses that have not been updated to use from_config style construction
+        # may have overridden _init_*_head methods. In this case, those overridden methods
+        # will not be classmethods and we need to avoid trying to call them here.
+        # We test for this with ismethod which only returns True for bound methods of cls.
+        # Such subclasses will need to handle calling their overridden _init_*_head methods.
+        if inspect.ismethod(cls._init_box_head):
+            ret.update(cls._init_box_head(cfg, input_shape))
+        if inspect.ismethod(cls._init_mask_head):
+            ret.update(cls._init_mask_head(cfg, input_shape))
+        if inspect.ismethod(cls._init_keypoint_head):
+            ret.update(cls._init_keypoint_head(cfg, input_shape))
+        return ret
+
+    @classmethod
+    def _init_box_head(cls, cfg, input_shape):
+        # fmt: off
+        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+        # fmt: on
+
+        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
+        # then we share the same predictors and therefore the channel counts must be the same
+        in_channels = [input_shape[f].channels for f in in_features]
+        # Check all channel counts are equal
+        assert len(set(in_channels)) == 1, in_channels
+        in_channels = in_channels[0]
+
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        # Here we split "box head" and "box predictor", which is mainly due to historical reasons.
+        # They are used together so the "box predictor" layers should be part of the "box head".
+        # New subclasses of ROIHeads do not need "box predictor"s.
+        box_head = build_box_head(
+            cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)
+        )
+        box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape)
+        return {
+            "box_in_features": in_features,
+            "box_pooler": box_pooler,
+            "box_head": box_head,
+            "box_predictor": box_predictor,
+        }
+
+    @classmethod
+    def _init_mask_head(cls, cfg, input_shape):
+        if not cfg.MODEL.MASK_ON:
+            return {}
+        # fmt: off
+        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
+        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio    = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type       = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE
+        # fmt: on
+
+        in_channels = [input_shape[f].channels for f in in_features][0]
+
+        ret = {"mask_in_features": in_features}
+        ret["mask_pooler"] = (
+            ROIPooler(
+                output_size=pooler_resolution,
+                scales=pooler_scales,
+                sampling_ratio=sampling_ratio,
+                pooler_type=pooler_type,
+            )
+            if pooler_type
+            else None
+        )
+        if pooler_type:
+            shape = ShapeSpec(
+                channels=in_channels, width=pooler_resolution, height=pooler_resolution
+            )
+        else:
+            shape = {f: input_shape[f] for f in in_features}
+        ret["mask_head"] = build_mask_head(cfg, shape)
+        return ret
+
+    @classmethod
+    def _init_keypoint_head(cls, cfg, input_shape):
+        if not cfg.MODEL.KEYPOINT_ON:
+            return {}
+        # fmt: off
+        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
+        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)  # noqa
+        sampling_ratio    = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type       = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE
+        # fmt: on
+
+        in_channels = [input_shape[f].channels for f in in_features][0]
+
+        ret = {"keypoint_in_features": in_features}
+        ret["keypoint_pooler"] = (
+            ROIPooler(
+                output_size=pooler_resolution,
+                scales=pooler_scales,
+                sampling_ratio=sampling_ratio,
+                pooler_type=pooler_type,
+            )
+            if pooler_type
+            else None
+        )
+        if pooler_type:
+            shape = ShapeSpec(
+                channels=in_channels, width=pooler_resolution, height=pooler_resolution
+            )
+        else:
+            shape = {f: input_shape[f] for f in in_features}
+        ret["keypoint_head"] = build_keypoint_head(cfg, shape)
+        return ret
+
+    def forward(
+        self,
+        images: ImageList,
+        features: Dict[str, torch.Tensor],
+        proposals: List[Instances],
+        targets: Optional[List[Instances]] = None,
+    ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]:
+        """
+        See :class:`ROIHeads.forward`.
+        """
+        del images
+        if self.training:
+            assert targets, "'targets' argument is required during training"
+            proposals = self.label_and_sample_proposals(proposals, targets)
+        del targets
+
+        if self.training:
+            losses = self._forward_box(features, proposals)
+            # Usually the original proposals used by the box head are used by the mask, keypoint
+            # heads. But when `self.train_on_pred_boxes is True`, proposals will contain boxes
+            # predicted by the box head.
+            losses.update(self._forward_mask(features, proposals))
+            losses.update(self._forward_keypoint(features, proposals))
+            return proposals, losses
+        else:
+            pred_instances = self._forward_box(features, proposals)
+            # During inference cascaded prediction is used: the mask and keypoints heads are only
+            # applied to the top scoring box detections.
+            pred_instances = self.forward_with_given_boxes(features, pred_instances)
+            return pred_instances, {}
+
+    def forward_with_given_boxes(
+        self, features: Dict[str, torch.Tensor], instances: List[Instances]
+    ) -> List[Instances]:
+        """
+        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+
+        This is useful for downstream tasks where a box is known, but need to obtain
+        other attributes (outputs of other heads).
+        Test-time augmentation also uses this.
+
+        Args:
+            features: same as in `forward()`
+            instances (list[Instances]): instances to predict other outputs. Expect the keys
+                "pred_boxes" and "pred_classes" to exist.
+
+        Returns:
+            list[Instances]:
+                the same `Instances` objects, with extra
+                fields such as `pred_masks` or `pred_keypoints`.
+        """
+        assert not self.training
+        assert instances[0].has("pred_boxes") and instances[0].has("pred_classes")
+
+        instances = self._forward_mask(features, instances)
+        instances = self._forward_keypoint(features, instances)
+        return instances
+
+    def _forward_box(self, features: Dict[str, torch.Tensor], proposals: List[Instances]):
+        """
+        Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`,
+            the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument.
+
+        Args:
+            features (dict[str, Tensor]): mapping from feature map names to tensor.
+                Same as in :meth:`ROIHeads.forward`.
+            proposals (list[Instances]): the per-image object proposals with
+                their matching ground truth.
+                Each has fields "proposal_boxes", and "objectness_logits",
+                "gt_classes", "gt_boxes".
+
+        Returns:
+            In training, a dict of losses.
+            In inference, a list of `Instances`, the predicted instances.
+        """
+        features = [features[f] for f in self.box_in_features]
+        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
+        box_features = self.box_head(box_features)
+        predictions = self.box_predictor(box_features)
+        del box_features
+
+        if self.training:
+            losses = self.box_predictor.losses(predictions, proposals)
+            # proposals is modified in-place below, so losses must be computed first.
+            if self.train_on_pred_boxes:
+                with torch.no_grad():
+                    pred_boxes = self.box_predictor.predict_boxes_for_gt_classes(
+                        predictions, proposals
+                    )
+                    for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes):
+                        proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image)
+            return losses
+        else:
+            pred_instances, _ = self.box_predictor.inference(predictions, proposals)
+            return pred_instances
+
+    def _forward_mask(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
+        """
+        Forward logic of the mask prediction branch.
+
+        Args:
+            features (dict[str, Tensor]): mapping from feature map names to tensor.
+                Same as in :meth:`ROIHeads.forward`.
+            instances (list[Instances]): the per-image instances to train/predict masks.
+                In training, they can be the proposals.
+                In inference, they can be the boxes predicted by R-CNN box head.
+
+        Returns:
+            In training, a dict of losses.
+            In inference, update `instances` with new fields "pred_masks" and return it.
+        """
+        if not self.mask_on:
+            return {} if self.training else instances
+
+        if self.training:
+            # head is only trained on positive proposals.
+            instances, _ = select_foreground_proposals(instances, self.num_classes)
+
+        if self.mask_pooler is not None:
+            features = [features[f] for f in self.mask_in_features]
+            boxes = [x.proposal_boxes if self.training else x.pred_boxes for x in instances]
+            features = self.mask_pooler(features, boxes)
+        else:
+            features = {f: features[f] for f in self.mask_in_features}
+        return self.mask_head(features, instances)
+
+    def _forward_keypoint(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
+        """
+        Forward logic of the keypoint prediction branch.
+
+        Args:
+            features (dict[str, Tensor]): mapping from feature map names to tensor.
+                Same as in :meth:`ROIHeads.forward`.
+            instances (list[Instances]): the per-image instances to train/predict keypoints.
+                In training, they can be the proposals.
+                In inference, they can be the boxes predicted by R-CNN box head.
+
+        Returns:
+            In training, a dict of losses.
+            In inference, update `instances` with new fields "pred_keypoints" and return it.
+        """
+        if not self.keypoint_on:
+            return {} if self.training else instances
+
+        if self.training:
+            # head is only trained on positive proposals with >=1 visible keypoints.
+            instances, _ = select_foreground_proposals(instances, self.num_classes)
+            instances = select_proposals_with_visible_keypoints(instances)
+
+        if self.keypoint_pooler is not None:
+            features = [features[f] for f in self.keypoint_in_features]
+            boxes = [x.proposal_boxes if self.training else x.pred_boxes for x in instances]
+            features = self.keypoint_pooler(features, boxes)
+        else:
+            features = {f: features[f] for f in self.keypoint_in_features}
+        return self.keypoint_head(features, instances)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/rotated_fast_rcnn.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/rotated_fast_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..186ae03ffd9c575cc6e065a2b06651c947b9953b
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/roi_heads/rotated_fast_rcnn.py
@@ -0,0 +1,271 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+import torch
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec, batched_nms_rotated
+from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated
+from detectron2.utils.events import get_event_storage
+
+from ..box_regression import Box2BoxTransformRotated
+from ..poolers import ROIPooler
+from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals
+from .box_head import build_box_head
+from .fast_rcnn import FastRCNNOutputLayers
+from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads
+
+logger = logging.getLogger(__name__)
+
+"""
+Shape shorthand in this module:
+
+    N: number of images in the minibatch
+    R: number of ROIs, combined over all images, in the minibatch
+    Ri: number of ROIs in image i
+    K: number of foreground classes. E.g.,there are 80 foreground classes in COCO.
+
+Naming convention:
+
+    deltas: refers to the 5-d (dx, dy, dw, dh, da) deltas that parameterize the box2box
+    transform (see :class:`box_regression.Box2BoxTransformRotated`).
+
+    pred_class_logits: predicted class scores in [-inf, +inf]; use
+        softmax(pred_class_logits) to estimate P(class).
+
+    gt_classes: ground-truth classification labels in [0, K], where [0, K) represent
+        foreground object classes and K represents the background class.
+
+    pred_proposal_deltas: predicted rotated box2box transform deltas for transforming proposals
+        to detection box predictions.
+
+    gt_proposal_deltas: ground-truth rotated box2box transform deltas
+"""
+
+
+def fast_rcnn_inference_rotated(
+    boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image
+):
+    """
+    Call `fast_rcnn_inference_single_image_rotated` for all images.
+
+    Args:
+        boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic
+            boxes for each image. Element i has shape (Ri, K * 5) if doing
+            class-specific regression, or (Ri, 5) if doing class-agnostic
+            regression, where Ri is the number of predicted objects for image i.
+            This is compatible with the output of :meth:`FastRCNNOutputs.predict_boxes`.
+        scores (list[Tensor]): A list of Tensors of predicted class scores for each image.
+            Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
+            for image i. Compatible with the output of :meth:`FastRCNNOutputs.predict_probs`.
+        image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch.
+        score_thresh (float): Only return detections with a confidence score exceeding this
+            threshold.
+        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1].
+        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
+            all detections.
+
+    Returns:
+        instances: (list[Instances]): A list of N instances, one for each image in the batch,
+            that stores the topk most confidence detections.
+        kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates
+            the corresponding boxes/scores index in [0, Ri) from the input, for image i.
+    """
+    result_per_image = [
+        fast_rcnn_inference_single_image_rotated(
+            boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image
+        )
+        for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes)
+    ]
+    return [x[0] for x in result_per_image], [x[1] for x in result_per_image]
+
+
+def fast_rcnn_inference_single_image_rotated(
+    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
+):
+    """
+    Single-image inference. Return rotated bounding-box detection results by thresholding
+    on scores and applying rotated non-maximum suppression (Rotated NMS).
+
+    Args:
+        Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes
+        per image.
+
+    Returns:
+        Same as `fast_rcnn_inference_rotated`, but for only one image.
+    """
+    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
+    if not valid_mask.all():
+        boxes = boxes[valid_mask]
+        scores = scores[valid_mask]
+
+    B = 5  # box dimension
+    scores = scores[:, :-1]
+    num_bbox_reg_classes = boxes.shape[1] // B
+    # Convert to Boxes to use the `clip` function ...
+    boxes = RotatedBoxes(boxes.reshape(-1, B))
+    boxes.clip(image_shape)
+    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B)  # R x C x B
+    # Filter results based on detection scores
+    filter_mask = scores > score_thresh  # R x K
+    # R' x 2. First column contains indices of the R predictions;
+    # Second column contains indices of classes.
+    filter_inds = filter_mask.nonzero()
+    if num_bbox_reg_classes == 1:
+        boxes = boxes[filter_inds[:, 0], 0]
+    else:
+        boxes = boxes[filter_mask]
+    scores = scores[filter_mask]
+
+    # Apply per-class Rotated NMS
+    keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh)
+    if topk_per_image >= 0:
+        keep = keep[:topk_per_image]
+    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
+
+    result = Instances(image_shape)
+    result.pred_boxes = RotatedBoxes(boxes)
+    result.scores = scores
+    result.pred_classes = filter_inds[:, 1]
+
+    return result, filter_inds[:, 0]
+
+
+class RotatedFastRCNNOutputLayers(FastRCNNOutputLayers):
+    """
+    Two linear layers for predicting Rotated Fast R-CNN outputs.
+    """
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        args = super().from_config(cfg, input_shape)
+        args["box2box_transform"] = Box2BoxTransformRotated(
+            weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS
+        )
+        return args
+
+    def inference(self, predictions, proposals):
+        """
+        Returns:
+            list[Instances]: same as `fast_rcnn_inference_rotated`.
+            list[Tensor]: same as `fast_rcnn_inference_rotated`.
+        """
+        boxes = self.predict_boxes(predictions, proposals)
+        scores = self.predict_probs(predictions, proposals)
+        image_shapes = [x.image_size for x in proposals]
+
+        return fast_rcnn_inference_rotated(
+            boxes,
+            scores,
+            image_shapes,
+            self.test_score_thresh,
+            self.test_nms_thresh,
+            self.test_topk_per_image,
+        )
+
+
+@ROI_HEADS_REGISTRY.register()
+class RROIHeads(StandardROIHeads):
+    """
+    This class is used by Rotated Fast R-CNN to detect rotated boxes.
+    For now, it only supports box predictions but not mask or keypoints.
+    """
+
+    @configurable
+    def __init__(self, **kwargs):
+        """
+        NOTE: this interface is experimental.
+        """
+        super().__init__(**kwargs)
+        assert (
+            not self.mask_on and not self.keypoint_on
+        ), "Mask/Keypoints not supported in Rotated ROIHeads."
+        assert not self.train_on_pred_boxes, "train_on_pred_boxes not implemented for RROIHeads!"
+
+    @classmethod
+    def _init_box_head(cls, cfg, input_shape):
+        # fmt: off
+        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+        # fmt: on
+        assert pooler_type in ["ROIAlignRotated"], pooler_type
+        # assume all channel counts are equal
+        in_channels = [input_shape[f].channels for f in in_features][0]
+
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        box_head = build_box_head(
+            cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)
+        )
+        # This line is the only difference v.s. StandardROIHeads
+        box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape)
+        return {
+            "box_in_features": in_features,
+            "box_pooler": box_pooler,
+            "box_head": box_head,
+            "box_predictor": box_predictor,
+        }
+
+    @torch.no_grad()
+    def label_and_sample_proposals(self, proposals, targets):
+        """
+        Prepare some proposals to be used to train the RROI heads.
+        It performs box matching between `proposals` and `targets`, and assigns
+        training labels to the proposals.
+        It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes,
+        with a fraction of positives that is no larger than `self.positive_sample_fraction.
+
+        Args:
+            See :meth:`StandardROIHeads.forward`
+
+        Returns:
+            list[Instances]: length `N` list of `Instances`s containing the proposals
+                sampled for training. Each `Instances` has the following fields:
+                - proposal_boxes: the rotated proposal boxes
+                - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to
+                  (this is only meaningful if the proposal has a label > 0; if label = 0
+                   then the ground-truth box is random)
+                - gt_classes: the ground-truth classification lable for each proposal
+        """
+        gt_boxes = [x.gt_boxes for x in targets]
+        if self.proposal_append_gt:
+            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)
+
+        proposals_with_gt = []
+
+        num_fg_samples = []
+        num_bg_samples = []
+        for proposals_per_image, targets_per_image in zip(proposals, targets):
+            has_gt = len(targets_per_image) > 0
+            match_quality_matrix = pairwise_iou_rotated(
+                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
+            )
+            matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix)
+            sampled_idxs, gt_classes = self._sample_proposals(
+                matched_idxs, matched_labels, targets_per_image.gt_classes
+            )
+
+            proposals_per_image = proposals_per_image[sampled_idxs]
+            proposals_per_image.gt_classes = gt_classes
+
+            if has_gt:
+                sampled_targets = matched_idxs[sampled_idxs]
+                proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets]
+
+            num_bg_samples.append((gt_classes == self.num_classes).sum().item())
+            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
+            proposals_with_gt.append(proposals_per_image)
+
+        # Log the number of fg/bg samples that are selected for training ROI heads
+        storage = get_event_storage()
+        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
+        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))
+
+        return proposals_with_gt
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/sampling.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2d0f6648b349c5ea39fd29785b77c961a58fa22
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/sampling.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+
+from detectron2.layers import nonzero_tuple
+
+__all__ = ["subsample_labels"]
+
+
+def subsample_labels(
+    labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int
+):
+    """
+    Return `num_samples` (or fewer, if not enough found)
+    random samples from `labels` which is a mixture of positives & negatives.
+    It will try to return as many positives as possible without
+    exceeding `positive_fraction * num_samples`, and then try to
+    fill the remaining slots with negatives.
+
+    Args:
+        labels (Tensor): (N, ) label vector with values:
+            * -1: ignore
+            * bg_label: background ("negative") class
+            * otherwise: one or more foreground ("positive") classes
+        num_samples (int): The total number of labels with value >= 0 to return.
+            Values that are not sampled will be filled with -1 (ignore).
+        positive_fraction (float): The number of subsampled labels with values > 0
+            is `min(num_positives, int(positive_fraction * num_samples))`. The number
+            of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
+            In order words, if there are not enough positives, the sample is filled with
+            negatives. If there are also not enough negatives, then as many elements are
+            sampled as is possible.
+        bg_label (int): label index of background ("negative") class.
+
+    Returns:
+        pos_idx, neg_idx (Tensor):
+            1D vector of indices. The total length of both is `num_samples` or fewer.
+    """
+    positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0]
+    negative = nonzero_tuple(labels == bg_label)[0]
+
+    num_pos = int(num_samples * positive_fraction)
+    # protect against not enough positive examples
+    num_pos = min(positive.numel(), num_pos)
+    num_neg = num_samples - num_pos
+    # protect against not enough negative examples
+    num_neg = min(negative.numel(), num_neg)
+
+    # randomly select positive and negative examples
+    perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
+    perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
+
+    pos_idx = positive[perm1]
+    neg_idx = negative[perm2]
+    return pos_idx, neg_idx
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/test_time_augmentation.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/test_time_augmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..373e6bf00a39c040ff1da49d6dcd39a54a0b69a7
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/modeling/test_time_augmentation.py
@@ -0,0 +1,307 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import numpy as np
+from contextlib import contextmanager
+from itertools import count
+from typing import List
+import torch
+from fvcore.transforms import HFlipTransform, NoOpTransform
+from torch import nn
+from torch.nn.parallel import DistributedDataParallel
+
+from detectron2.config import configurable
+from detectron2.data.detection_utils import read_image
+from detectron2.data.transforms import (
+    RandomFlip,
+    ResizeShortestEdge,
+    ResizeTransform,
+    apply_augmentations,
+)
+from detectron2.structures import Boxes, Instances
+
+from .meta_arch import GeneralizedRCNN
+from .postprocessing import detector_postprocess
+from .roi_heads.fast_rcnn import fast_rcnn_inference_single_image
+
+__all__ = ["DatasetMapperTTA", "GeneralizedRCNNWithTTA"]
+
+
+class DatasetMapperTTA:
+    """
+    Implement test-time augmentation for detection data.
+    It is a callable which takes a dataset dict from a detection dataset,
+    and returns a list of dataset dicts where the images
+    are augmented from the input image by the transformations defined in the config.
+    This is used for test-time augmentation.
+    """
+
+    @configurable
+    def __init__(self, min_sizes: List[int], max_size: int, flip: bool):
+        """
+        Args:
+            min_sizes: list of short-edge size to resize the image to
+            max_size: maximum height or width of resized images
+            flip: whether to apply flipping augmentation
+        """
+        self.min_sizes = min_sizes
+        self.max_size = max_size
+        self.flip = flip
+
+    @classmethod
+    def from_config(cls, cfg):
+        return {
+            "min_sizes": cfg.TEST.AUG.MIN_SIZES,
+            "max_size": cfg.TEST.AUG.MAX_SIZE,
+            "flip": cfg.TEST.AUG.FLIP,
+        }
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dict: a dict in standard model input format. See tutorials for details.
+
+        Returns:
+            list[dict]:
+                a list of dicts, which contain augmented version of the input image.
+                The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``.
+                Each dict has field "transforms" which is a TransformList,
+                containing the transforms that are used to generate this image.
+        """
+        numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
+        shape = numpy_image.shape
+        orig_shape = (dataset_dict["height"], dataset_dict["width"])
+        if shape[:2] != orig_shape:
+            # It transforms the "original" image in the dataset to the input image
+            pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1])
+        else:
+            pre_tfm = NoOpTransform()
+
+        # Create all combinations of augmentations to use
+        aug_candidates = []  # each element is a list[Augmentation]
+        for min_size in self.min_sizes:
+            resize = ResizeShortestEdge(min_size, self.max_size)
+            aug_candidates.append([resize])  # resize only
+            if self.flip:
+                flip = RandomFlip(prob=1.0)
+                aug_candidates.append([resize, flip])  # resize + flip
+
+        # Apply all the augmentations
+        ret = []
+        for aug in aug_candidates:
+            new_image, tfms = apply_augmentations(aug, np.copy(numpy_image))
+            torch_image = torch.from_numpy(np.ascontiguousarray(new_image.transpose(2, 0, 1)))
+
+            dic = copy.deepcopy(dataset_dict)
+            dic["transforms"] = pre_tfm + tfms
+            dic["image"] = torch_image
+            ret.append(dic)
+        return ret
+
+
+class GeneralizedRCNNWithTTA(nn.Module):
+    """
+    A GeneralizedRCNN with test-time augmentation enabled.
+    Its :meth:`__call__` method has the same interface as :meth:`GeneralizedRCNN.forward`.
+    """
+
+    def __init__(self, cfg, model, tta_mapper=None, batch_size=3):
+        """
+        Args:
+            cfg (CfgNode):
+            model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
+            tta_mapper (callable): takes a dataset dict and returns a list of
+                augmented versions of the dataset dict. Defaults to
+                `DatasetMapperTTA(cfg)`.
+            batch_size (int): batch the augmented images into this batch size for inference.
+        """
+        super().__init__()
+        if isinstance(model, DistributedDataParallel):
+            model = model.module
+        assert isinstance(
+            model, GeneralizedRCNN
+        ), "TTA is only supported on GeneralizedRCNN. Got a model of type {}".format(type(model))
+        self.cfg = cfg.clone()
+        assert not self.cfg.MODEL.KEYPOINT_ON, "TTA for keypoint is not supported yet"
+        assert (
+            not self.cfg.MODEL.LOAD_PROPOSALS
+        ), "TTA for pre-computed proposals is not supported yet"
+
+        self.model = model
+
+        if tta_mapper is None:
+            tta_mapper = DatasetMapperTTA(cfg)
+        self.tta_mapper = tta_mapper
+        self.batch_size = batch_size
+
+    @contextmanager
+    def _turn_off_roi_heads(self, attrs):
+        """
+        Open a context where some heads in `model.roi_heads` are temporarily turned off.
+        Args:
+            attr (list[str]): the attribute in `model.roi_heads` which can be used
+                to turn off a specific head, e.g., "mask_on", "keypoint_on".
+        """
+        roi_heads = self.model.roi_heads
+        old = {}
+        for attr in attrs:
+            try:
+                old[attr] = getattr(roi_heads, attr)
+            except AttributeError:
+                # The head may not be implemented in certain ROIHeads
+                pass
+
+        if len(old.keys()) == 0:
+            yield
+        else:
+            for attr in old.keys():
+                setattr(roi_heads, attr, False)
+            yield
+            for attr in old.keys():
+                setattr(roi_heads, attr, old[attr])
+
+    def _batch_inference(self, batched_inputs, detected_instances=None):
+        """
+        Execute inference on a list of inputs,
+        using batch size = self.batch_size, instead of the length of the list.
+
+        Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference`
+        """
+        if detected_instances is None:
+            detected_instances = [None] * len(batched_inputs)
+
+        outputs = []
+        inputs, instances = [], []
+        for idx, input, instance in zip(count(), batched_inputs, detected_instances):
+            inputs.append(input)
+            instances.append(instance)
+            if len(inputs) == self.batch_size or idx == len(batched_inputs) - 1:
+                outputs.extend(
+                    self.model.inference(
+                        inputs,
+                        instances if instances[0] is not None else None,
+                        do_postprocess=False,
+                    )
+                )
+                inputs, instances = [], []
+        return outputs
+
+    def __call__(self, batched_inputs):
+        """
+        Same input/output format as :meth:`GeneralizedRCNN.forward`
+        """
+
+        def _maybe_read_image(dataset_dict):
+            ret = copy.copy(dataset_dict)
+            if "image" not in ret:
+                image = read_image(ret.pop("file_name"), self.model.input_format)
+                image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1)))  # CHW
+                ret["image"] = image
+            if "height" not in ret and "width" not in ret:
+                ret["height"] = image.shape[1]
+                ret["width"] = image.shape[2]
+            return ret
+
+        return [self._inference_one_image(_maybe_read_image(x)) for x in batched_inputs]
+
+    def _inference_one_image(self, input):
+        """
+        Args:
+            input (dict): one dataset dict with "image" field being a CHW tensor
+
+        Returns:
+            dict: one output dict
+        """
+        orig_shape = (input["height"], input["width"])
+        augmented_inputs, tfms = self._get_augmented_inputs(input)
+        # Detect boxes from all augmented versions
+        with self._turn_off_roi_heads(["mask_on", "keypoint_on"]):
+            # temporarily disable roi heads
+            all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms)
+        # merge all detected boxes to obtain final predictions for boxes
+        merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape)
+
+        if self.cfg.MODEL.MASK_ON:
+            # Use the detected boxes to obtain masks
+            augmented_instances = self._rescale_detected_boxes(
+                augmented_inputs, merged_instances, tfms
+            )
+            # run forward on the detected boxes
+            outputs = self._batch_inference(augmented_inputs, augmented_instances)
+            # Delete now useless variables to avoid being out of memory
+            del augmented_inputs, augmented_instances
+            # average the predictions
+            merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms)
+            merged_instances = detector_postprocess(merged_instances, *orig_shape)
+            return {"instances": merged_instances}
+        else:
+            return {"instances": merged_instances}
+
+    def _get_augmented_inputs(self, input):
+        augmented_inputs = self.tta_mapper(input)
+        tfms = [x.pop("transforms") for x in augmented_inputs]
+        return augmented_inputs, tfms
+
+    def _get_augmented_boxes(self, augmented_inputs, tfms):
+        # 1: forward with all augmented images
+        outputs = self._batch_inference(augmented_inputs)
+        # 2: union the results
+        all_boxes = []
+        all_scores = []
+        all_classes = []
+        for output, tfm in zip(outputs, tfms):
+            # Need to inverse the transforms on boxes, to obtain results on original image
+            pred_boxes = output.pred_boxes.tensor
+            original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy())
+            all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device))
+
+            all_scores.extend(output.scores)
+            all_classes.extend(output.pred_classes)
+        all_boxes = torch.cat(all_boxes, dim=0)
+        return all_boxes, all_scores, all_classes
+
+    def _merge_detections(self, all_boxes, all_scores, all_classes, shape_hw):
+        # select from the union of all results
+        num_boxes = len(all_boxes)
+        num_classes = self.cfg.MODEL.ROI_HEADS.NUM_CLASSES
+        # +1 because fast_rcnn_inference expects background scores as well
+        all_scores_2d = torch.zeros(num_boxes, num_classes + 1, device=all_boxes.device)
+        for idx, cls, score in zip(count(), all_classes, all_scores):
+            all_scores_2d[idx, cls] = score
+
+        merged_instances, _ = fast_rcnn_inference_single_image(
+            all_boxes,
+            all_scores_2d,
+            shape_hw,
+            1e-8,
+            self.cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST,
+            self.cfg.TEST.DETECTIONS_PER_IMAGE,
+        )
+
+        return merged_instances
+
+    def _rescale_detected_boxes(self, augmented_inputs, merged_instances, tfms):
+        augmented_instances = []
+        for input, tfm in zip(augmented_inputs, tfms):
+            # Transform the target box to the augmented image's coordinate space
+            pred_boxes = merged_instances.pred_boxes.tensor.cpu().numpy()
+            pred_boxes = torch.from_numpy(tfm.apply_box(pred_boxes))
+
+            aug_instances = Instances(
+                image_size=input["image"].shape[1:3],
+                pred_boxes=Boxes(pred_boxes),
+                pred_classes=merged_instances.pred_classes,
+                scores=merged_instances.scores,
+            )
+            augmented_instances.append(aug_instances)
+        return augmented_instances
+
+    def _reduce_pred_masks(self, outputs, tfms):
+        # Should apply inverse transforms on masks.
+        # We assume only resize & flip are used. pred_masks is a scale-invariant
+        # representation, so we handle flip specially
+        for output, tfm in zip(outputs, tfms):
+            if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
+                output.pred_masks = output.pred_masks.flip(dims=[3])
+        all_pred_masks = torch.stack([o.pred_masks for o in outputs], dim=0)
+        avg_pred_masks = torch.mean(all_pred_masks, dim=0)
+        return avg_pred_masks
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/FocalTransformer.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/FocalTransformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f2e8313f1b590e3bed462ded088865c41010a00
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/FocalTransformer.py
@@ -0,0 +1,680 @@
+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+import math
+
+
+class FocalTransformerBlock(nn.Module):
+    r""" Focal Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resulotion.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        expand_size (int): expand size at first focal level (finest level).
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm 
+        pool_method (str): window pooling method. Default: none, options: [none|fc|conv]
+        focal_level (int): number of focal levels. Default: 1. 
+        focal_window (int): region size of focal attention. Default: 1
+        use_layerscale (bool): whether use layer scale for training stability. Default: False
+        layerscale_value (float): scaling value for layer scale. Default: 1e-4
+    """
+
+    def __init__(self, dim, input_resolution, num_heads, window_size=7, expand_size=0, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm, pool_method="none", 
+                 focal_level=1, focal_window=1, use_layerscale=False, layerscale_value=1e-4):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.expand_size = expand_size
+        self.mlp_ratio = mlp_ratio
+        self.pool_method = pool_method
+        self.focal_level = focal_level
+        self.focal_window = focal_window
+        self.use_layerscale = use_layerscale
+
+        if min(self.input_resolution) <= self.window_size:
+            # if window size is larger than input resolution, we don't partition windows
+            self.expand_size = 0
+            self.shift_size = 0
+            self.window_size = min(self.input_resolution)
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+
+        self.window_size_glo = self.window_size
+
+        self.pool_layers = nn.ModuleList()
+        if self.pool_method != "none":
+            for k in range(self.focal_level-1):
+                window_size_glo = math.floor(self.window_size_glo / (2 ** k))
+                if self.pool_method == "fc":
+                    self.pool_layers.append(nn.Linear(window_size_glo * window_size_glo, 1))
+                    self.pool_layers[-1].weight.data.fill_(1./(window_size_glo * window_size_glo))
+                    self.pool_layers[-1].bias.data.fill_(0)
+                elif self.pool_method == "conv":
+                    self.pool_layers.append(nn.Conv2d(dim, dim, kernel_size=window_size_glo, stride=window_size_glo, groups=dim))
+
+        self.norm1 = norm_layer(dim)
+
+        self.attn = WindowAttention(
+            dim, expand_size=self.expand_size, window_size=(self.window_size,self.window_size), 
+            focal_window=focal_window, focal_level=focal_level, num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop, pool_method=pool_method)
+
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        if self.shift_size > 0:
+            # calculate attention mask for SW-MSA
+            H, W = self.input_resolution
+            img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+            h_slices = (slice(0, -self.window_size),
+                        slice(-self.window_size, -self.shift_size),
+                        slice(-self.shift_size, None))
+            w_slices = (slice(0, -self.window_size),
+                        slice(-self.window_size, -self.shift_size),
+                        slice(-self.shift_size, None))
+            cnt = 0
+            for h in h_slices:
+                for w in w_slices:
+                    img_mask[:, h, w, :] = cnt
+                    cnt += 1
+
+            mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+            mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+            attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+        else:
+            attn_mask = None
+        self.register_buffer("attn_mask", attn_mask)
+
+        if self.use_layerscale:
+            self.gamma_1 = nn.Parameter(layerscale_value * torch.ones((dim)), requires_grad=True)
+            self.gamma_2 = nn.Parameter(layerscale_value * torch.ones((dim)), requires_grad=True)
+
+    def forward(self, x):
+        H, W = self.input_resolution
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+
+        # pad feature maps to multiples of window size
+        pad_l = pad_t = 0
+        pad_r = (self.window_size - W % self.window_size) % self.window_size
+        pad_b = (self.window_size - H % self.window_size) % self.window_size
+        if pad_r > 0 or pad_b > 0:
+            x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
+        
+        B, H, W, C = x.shape    
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+        else:
+            shifted_x = x
+        
+        x_windows_all = [shifted_x]
+        x_window_masks_all = [self.attn_mask]
+        
+        if self.focal_level > 1 and self.pool_method != "none": 
+            # if we add coarser granularity and the pool method is not none
+            for k in range(self.focal_level-1):     
+                window_size_glo = math.floor(self.window_size_glo / (2 ** k))
+                pooled_h = math.ceil(H / self.window_size) * (2 ** k)
+                pooled_w = math.ceil(W / self.window_size) * (2 ** k)
+                H_pool = pooled_h * window_size_glo
+                W_pool = pooled_w * window_size_glo
+
+                x_level_k = shifted_x
+                # trim or pad shifted_x depending on the required size
+                if H > H_pool:
+                    trim_t = (H - H_pool) // 2
+                    trim_b = H - H_pool - trim_t
+                    x_level_k = x_level_k[:, trim_t:-trim_b]
+                elif H < H_pool:
+                    pad_t = (H_pool - H) // 2
+                    pad_b = H_pool - H - pad_t
+                    x_level_k = F.pad(x_level_k, (0,0,0,0,pad_t,pad_b))
+                
+                if W > W_pool:
+                    trim_l = (W - W_pool) // 2
+                    trim_r = W - W_pool - trim_l
+                    x_level_k = x_level_k[:, :, trim_l:-trim_r]
+                elif W < W_pool:
+                    pad_l = (W_pool - W) // 2
+                    pad_r = W_pool - W - pad_l
+                    x_level_k = F.pad(x_level_k, (0,0,pad_l,pad_r))
+
+                x_windows_noreshape = window_partition_noreshape(x_level_k.contiguous(), window_size_glo) # B, nw, nw, window_size, window_size, C    
+                nWh, nWw = x_windows_noreshape.shape[1:3]
+                if self.pool_method == "mean":
+                    x_windows_pooled = x_windows_noreshape.mean([3, 4]) # B, nWh, nWw, C
+                elif self.pool_method == "max":
+                    x_windows_pooled = x_windows_noreshape.max(-2)[0].max(-2)[0].view(B, nWh, nWw, C) # B, nWh, nWw, C                    
+                elif self.pool_method == "fc":
+                    x_windows_noreshape = x_windows_noreshape.view(B, nWh, nWw, window_size_glo*window_size_glo, C).transpose(3, 4) # B, nWh, nWw, C, wsize**2
+                    x_windows_pooled = self.pool_layers[k](x_windows_noreshape).flatten(-2) # B, nWh, nWw, C                      
+                elif self.pool_method == "conv":
+                    x_windows_noreshape = x_windows_noreshape.view(-1, window_size_glo, window_size_glo, C).permute(0, 3, 1, 2).contiguous() # B * nw * nw, C, wsize, wsize
+                    x_windows_pooled = self.pool_layers[k](x_windows_noreshape).view(B, nWh, nWw, C) # B, nWh, nWw, C           
+
+                x_windows_all += [x_windows_pooled]
+                x_window_masks_all += [None]
+        
+        attn_windows = self.attn(x_windows_all, mask_all=x_window_masks_all)  # nW*B, window_size*window_size, C
+
+        attn_windows = attn_windows[:, :self.window_size ** 2]
+        
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
+
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+        x = x[:, :self.input_resolution[0], :self.input_resolution[1]].contiguous().view(B, -1, C)
+
+        # FFN
+        x = shortcut + self.drop_path(x if (not self.use_layerscale) else (self.gamma_1 * x))
+        x = x + self.drop_path(self.mlp(self.norm2(x)) if (not self.use_layerscale) else (self.gamma_2 * self.mlp(self.norm2(x))))
+
+        return x
+
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
+               f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
+
+    def flops(self):
+        flops = 0
+        H, W = self.input_resolution
+        # norm1
+        flops += self.dim * H * W
+        
+        # W-MSA/SW-MSA
+        nW = H * W / self.window_size / self.window_size
+        flops += nW * self.attn.flops(self.window_size * self.window_size, self.window_size, self.focal_window)
+
+        if self.pool_method != "none" and self.focal_level > 1:
+            for k in range(self.focal_level-1):
+                window_size_glo = math.floor(self.window_size_glo / (2 ** k))
+                nW_glo = nW * (2**k)
+                # (sub)-window pooling
+                flops += nW_glo * self.dim * window_size_glo * window_size_glo         
+                # qkv for global levels
+                # NOTE: in our implementation, we pass the pooled window embedding to qkv embedding layer, 
+                # but theoritically, we only need to compute k and v.
+                flops += nW_glo * self.dim * 3 * self.dim       
+
+        # mlp
+        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
+        # norm2
+        flops += self.dim * H * W
+        return flops
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+
+def window_partition_noreshape(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (B, num_windows_h, num_windows_w, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous()
+    return windows
+
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+
+def get_roll_masks(H, W, window_size, shift_size):
+    #####################################
+    # move to top-left
+    img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+    h_slices = (slice(0, H-window_size),
+                slice(H-window_size, H-shift_size),
+                slice(H-shift_size, H))
+    w_slices = (slice(0, W-window_size),
+                slice(W-window_size, W-shift_size),
+                slice(W-shift_size, W))
+    cnt = 0
+    for h in h_slices:
+        for w in w_slices:
+            img_mask[:, h, w, :] = cnt
+            cnt += 1
+
+    mask_windows = window_partition(img_mask, window_size)  # nW, window_size, window_size, 1
+    mask_windows = mask_windows.view(-1, window_size * window_size)
+    attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+    attn_mask_tl = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+    ####################################
+    # move to top right
+    img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+    h_slices = (slice(0, H-window_size),
+                slice(H-window_size, H-shift_size),
+                slice(H-shift_size, H))
+    w_slices = (slice(0, shift_size),
+                slice(shift_size, window_size),
+                slice(window_size, W))
+    cnt = 0
+    for h in h_slices:
+        for w in w_slices:
+            img_mask[:, h, w, :] = cnt
+            cnt += 1
+
+    mask_windows = window_partition(img_mask, window_size)  # nW, window_size, window_size, 1
+    mask_windows = mask_windows.view(-1, window_size * window_size)
+    attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+    attn_mask_tr = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+    ####################################
+    # move to bottom left
+    img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+    h_slices = (slice(0, shift_size),
+                slice(shift_size, window_size),
+                slice(window_size, H))
+    w_slices = (slice(0, W-window_size),
+                slice(W-window_size, W-shift_size),
+                slice(W-shift_size, W))
+    cnt = 0
+    for h in h_slices:
+        for w in w_slices:
+            img_mask[:, h, w, :] = cnt
+            cnt += 1
+
+    mask_windows = window_partition(img_mask, window_size)  # nW, window_size, window_size, 1
+    mask_windows = mask_windows.view(-1, window_size * window_size)
+    attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+    attn_mask_bl = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+    ####################################
+    # move to bottom right
+    img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+    h_slices = (slice(0, shift_size),
+                slice(shift_size, window_size),
+                slice(window_size, H))
+    w_slices = (slice(0, shift_size),
+                slice(shift_size, window_size),
+                slice(window_size, W))
+    cnt = 0
+    for h in h_slices:
+        for w in w_slices:
+            img_mask[:, h, w, :] = cnt
+            cnt += 1
+
+    mask_windows = window_partition(img_mask, window_size)  # nW, window_size, window_size, 1
+    mask_windows = mask_windows.view(-1, window_size * window_size)
+    attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+    attn_mask_br = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+    # append all
+    attn_mask_all = torch.cat((attn_mask_tl, attn_mask_tr, attn_mask_bl, attn_mask_br), -1)
+    return attn_mask_all
+
+def get_relative_position_index(q_windows, k_windows):
+    """
+    Args:
+        q_windows: tuple (query_window_height, query_window_width)
+        k_windows: tuple (key_window_height, key_window_width)
+    Returns:
+        relative_position_index: query_window_height*query_window_width, key_window_height*key_window_width
+    """
+    # get pair-wise relative position index for each token inside the window
+    coords_h_q = torch.arange(q_windows[0])
+    coords_w_q = torch.arange(q_windows[1])
+    coords_q = torch.stack(torch.meshgrid([coords_h_q, coords_w_q]))  # 2, Wh_q, Ww_q
+
+    coords_h_k = torch.arange(k_windows[0])
+    coords_w_k = torch.arange(k_windows[1])
+    coords_k = torch.stack(torch.meshgrid([coords_h_k, coords_w_k]))  # 2, Wh, Ww
+
+    coords_flatten_q = torch.flatten(coords_q, 1)  # 2, Wh_q*Ww_q
+    coords_flatten_k = torch.flatten(coords_k, 1)  # 2, Wh_k*Ww_k
+
+    relative_coords = coords_flatten_q[:, :, None] - coords_flatten_k[:, None, :]  # 2, Wh_q*Ww_q, Wh_k*Ww_k
+    relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh_q*Ww_q, Wh_k*Ww_k, 2
+    relative_coords[:, :, 0] += k_windows[0] - 1  # shift to start from 0
+    relative_coords[:, :, 1] += k_windows[1] - 1
+    relative_coords[:, :, 0] *= (q_windows[1] + k_windows[1]) - 1
+    relative_position_index = relative_coords.sum(-1)  #  Wh_q*Ww_q, Wh_k*Ww_k
+    return relative_position_index
+
+class WindowAttention(nn.Module):
+    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
+    Args:
+        dim (int): Number of input channels.
+        expand_size (int): The expand size at focal level 1.
+        window_size (tuple[int]): The height and width of the window.
+        focal_window (int): Focal region size.
+        focal_level (int): Focal attention level.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0 
+        pool_method (str): window pooling method. Default: none
+    """
+
+    def __init__(self, dim, expand_size, window_size, focal_window, focal_level, num_heads, 
+                    qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0., pool_method="none"):
+
+        super().__init__()
+        self.dim = dim
+        self.expand_size = expand_size
+        self.window_size = window_size  # Wh, Ww
+        self.pool_method = pool_method
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.focal_level = focal_level
+        self.focal_window = focal_window
+
+        # define a parameter table of relative position bias for each window
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+
+        if self.expand_size > 0 and focal_level > 0:
+            # define a parameter table of position bias between window and its fine-grained surroundings
+            self.window_size_of_key = self.window_size[0] * self.window_size[1] if self.expand_size == 0 else \
+                (4 * self.window_size[0] * self.window_size[1] - 4 * (self.window_size[0] -  self.expand_size) * (self.window_size[0] -  self.expand_size))        
+            self.relative_position_bias_table_to_neighbors = nn.Parameter(
+                torch.zeros(1, num_heads, self.window_size[0] * self.window_size[1], self.window_size_of_key))  # Wh*Ww, nH, nSurrounding
+            trunc_normal_(self.relative_position_bias_table_to_neighbors, std=.02)
+
+            # get mask for rolled k and rolled v
+            mask_tl = torch.ones(self.window_size[0], self.window_size[1]); mask_tl[:-self.expand_size, :-self.expand_size] = 0
+            mask_tr = torch.ones(self.window_size[0], self.window_size[1]); mask_tr[:-self.expand_size, self.expand_size:] = 0
+            mask_bl = torch.ones(self.window_size[0], self.window_size[1]); mask_bl[self.expand_size:, :-self.expand_size] = 0
+            mask_br = torch.ones(self.window_size[0], self.window_size[1]); mask_br[self.expand_size:, self.expand_size:] = 0
+            mask_rolled = torch.stack((mask_tl, mask_tr, mask_bl, mask_br), 0).flatten(0)
+            self.register_buffer("valid_ind_rolled", mask_rolled.nonzero().view(-1))
+
+        if pool_method != "none" and focal_level > 1:
+            self.relative_position_bias_table_to_windows = nn.ParameterList()
+            self.unfolds = nn.ModuleList()
+
+            # build relative position bias between local patch and pooled windows
+            for k in range(focal_level-1):
+                stride = 2**k    
+                kernel_size = 2*(self.focal_window // 2) + 2**k + (2**k-1)
+                # define unfolding operations                
+                self.unfolds += [nn.Unfold(
+                    kernel_size=(kernel_size, kernel_size), 
+                    stride=stride, padding=kernel_size // 2)
+                ]
+
+                # define relative position bias table
+                relative_position_bias_table_to_windows = nn.Parameter(
+                    torch.zeros(
+                        self.num_heads,
+                        (self.window_size[0] + self.focal_window + 2**k - 2) * (self.window_size[1] + self.focal_window + 2**k - 2), 
+                        )
+                )
+                trunc_normal_(relative_position_bias_table_to_windows, std=.02)
+                self.relative_position_bias_table_to_windows.append(relative_position_bias_table_to_windows)
+
+                # define relative position bias index
+                relative_position_index_k = get_relative_position_index(self.window_size, to_2tuple(self.focal_window + 2**k - 1))
+                self.register_buffer("relative_position_index_{}".format(k), relative_position_index_k)
+
+                # define unfolding index for focal_level > 0
+                if k > 0:
+                    mask = torch.zeros(kernel_size, kernel_size); mask[(2**k)-1:, (2**k)-1:] = 1
+                    self.register_buffer("valid_ind_unfold_{}".format(k), mask.flatten(0).nonzero().view(-1))
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x_all, mask_all=None):
+        """
+        Args:
+            x_all (list[Tensors]): input features at different granularity
+            mask_all (list[Tensors/None]): masks for input features at different granularity
+        """
+        x = x_all[0] # 
+        B, nH, nW, C = x.shape
+        qkv = self.qkv(x).reshape(B, nH, nW, 3, C).permute(3, 0, 1, 2, 4).contiguous()
+        q, k, v = qkv[0], qkv[1], qkv[2]  # B, nH, nW, C
+
+        # partition q map
+        (q_windows, k_windows, v_windows) = map(
+            lambda t: window_partition(t, self.window_size[0]).view(
+            -1, self.window_size[0] * self.window_size[0], self.num_heads, C // self.num_heads
+            ).transpose(1, 2), 
+            (q, k, v)
+        )
+
+        if self.expand_size > 0 and self.focal_level > 0:
+            (k_tl, v_tl) = map(
+                lambda t: torch.roll(t, shifts=(-self.expand_size, -self.expand_size), dims=(1, 2)), (k, v)
+            )
+            (k_tr, v_tr) = map(
+                lambda t: torch.roll(t, shifts=(-self.expand_size, self.expand_size), dims=(1, 2)), (k, v)
+            )
+            (k_bl, v_bl) = map(
+                lambda t: torch.roll(t, shifts=(self.expand_size, -self.expand_size), dims=(1, 2)), (k, v)
+            )
+            (k_br, v_br) = map(
+                lambda t: torch.roll(t, shifts=(self.expand_size, self.expand_size), dims=(1, 2)), (k, v)
+            )        
+            
+            (k_tl_windows, k_tr_windows, k_bl_windows, k_br_windows) = map(
+                lambda t: window_partition(t, self.window_size[0]).view(-1, self.window_size[0] * self.window_size[0], self.num_heads, C // self.num_heads), 
+                (k_tl, k_tr, k_bl, k_br)
+            )            
+            (v_tl_windows, v_tr_windows, v_bl_windows, v_br_windows) = map(
+                lambda t: window_partition(t, self.window_size[0]).view(-1, self.window_size[0] * self.window_size[0], self.num_heads, C // self.num_heads), 
+                (v_tl, v_tr, v_bl, v_br)
+            )
+            k_rolled = torch.cat((k_tl_windows, k_tr_windows, k_bl_windows, k_br_windows), 1).transpose(1, 2)
+            v_rolled = torch.cat((v_tl_windows, v_tr_windows, v_bl_windows, v_br_windows), 1).transpose(1, 2)
+            
+            # mask out tokens in current window
+            k_rolled = k_rolled[:, :, self.valid_ind_rolled]
+            v_rolled = v_rolled[:, :, self.valid_ind_rolled]
+            k_rolled = torch.cat((k_windows, k_rolled), 2)
+            v_rolled = torch.cat((v_windows, v_rolled), 2)
+        else:
+            k_rolled = k_windows; v_rolled = v_windows; 
+        if self.pool_method != "none" and self.focal_level > 1:
+            k_pooled = []
+            v_pooled = []
+            for k in range(self.focal_level-1):
+                stride = 2**k
+                x_window_pooled = x_all[k+1]  # B, nWh, nWw, C
+                nWh, nWw = x_window_pooled.shape[1:3] 
+
+                # generate mask for pooled windows
+                mask = x_window_pooled.new(nWh, nWw).fill_(1)
+                unfolded_mask = self.unfolds[k](mask.unsqueeze(0).unsqueeze(1)).view(
+                    1, 1, self.unfolds[k].kernel_size[0], self.unfolds[k].kernel_size[1], -1).permute(0, 4, 2, 3, 1).contiguous().\
+                    view(nWh*nWw // stride // stride, -1, 1)
+
+                if k > 0:
+                    valid_ind_unfold_k = getattr(self, "valid_ind_unfold_{}".format(k))
+                    unfolded_mask = unfolded_mask[:, valid_ind_unfold_k]
+
+                x_window_masks = unfolded_mask.flatten(1).unsqueeze(0)
+                x_window_masks = x_window_masks.masked_fill(x_window_masks == 0, float(-100.0)).masked_fill(x_window_masks > 0, float(0.0))            
+                mask_all[k+1] = x_window_masks
+
+                # generate k and v for pooled windows                
+                qkv_pooled = self.qkv(x_window_pooled).reshape(B, nWh, nWw, 3, C).permute(3, 0, 4, 1, 2).contiguous()
+                k_pooled_k, v_pooled_k = qkv_pooled[1], qkv_pooled[2]  # B, C, nWh, nWw
+
+
+                (k_pooled_k, v_pooled_k) = map(
+                    lambda t: self.unfolds[k](t).view(
+                    B, C, self.unfolds[k].kernel_size[0], self.unfolds[k].kernel_size[1], -1).permute(0, 4, 2, 3, 1).contiguous().\
+                    view(-1, self.unfolds[k].kernel_size[0]*self.unfolds[k].kernel_size[1], self.num_heads, C // self.num_heads).transpose(1, 2), 
+                    (k_pooled_k, v_pooled_k)  # (B x (nH*nW)) x nHeads x (unfold_wsize x unfold_wsize) x head_dim
+                )
+
+                if k > 0:                    
+                    (k_pooled_k, v_pooled_k) = map(
+                        lambda t: t[:, :, valid_ind_unfold_k], (k_pooled_k, v_pooled_k)
+                    )
+
+                k_pooled += [k_pooled_k]
+                v_pooled += [v_pooled_k]
+            k_all = torch.cat([k_rolled] + k_pooled, 2)
+            v_all = torch.cat([v_rolled] + v_pooled, 2)
+        else:
+            k_all = k_rolled
+            v_all = v_rolled
+
+        N = k_all.shape[-2]
+        q_windows = q_windows * self.scale
+        attn = (q_windows @ k_all.transpose(-2, -1))  # B*nW, nHead, window_size*window_size, focal_window_size*focal_window_size
+
+        window_area = self.window_size[0] * self.window_size[1]        
+        window_area_rolled = k_rolled.shape[2]
+
+        # add relative position bias for tokens inside window
+        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn[:, :, :window_area, :window_area] = attn[:, :, :window_area, :window_area] + relative_position_bias.unsqueeze(0)
+
+        # add relative position bias for patches inside a window
+        if self.expand_size > 0 and self.focal_level > 0:
+            attn[:, :, :window_area, window_area:window_area_rolled] = attn[:, :, :window_area, window_area:window_area_rolled] + self.relative_position_bias_table_to_neighbors
+
+        if self.pool_method != "none" and self.focal_level > 1:
+            # add relative position bias for different windows in an image        
+            offset = window_area_rolled
+            for k in range(self.focal_level-1):
+                # add relative position bias
+                relative_position_index_k = getattr(self, 'relative_position_index_{}'.format(k))
+                relative_position_bias_to_windows = self.relative_position_bias_table_to_windows[k][:, relative_position_index_k.view(-1)].view(
+                    -1, self.window_size[0] * self.window_size[1], (self.focal_window+2**k-1)**2,
+                ) # nH, NWh*NWw,focal_region*focal_region
+                attn[:, :, :window_area, offset:(offset + (self.focal_window+2**k-1)**2)] = \
+                    attn[:, :, :window_area, offset:(offset + (self.focal_window+2**k-1)**2)] + relative_position_bias_to_windows.unsqueeze(0)
+                # add attentional mask
+                if mask_all[k+1] is not None:
+                    attn[:, :, :window_area, offset:(offset + (self.focal_window+2**k-1)**2)] = \
+                        attn[:, :, :window_area, offset:(offset + (self.focal_window+2**k-1)**2)] + \
+                            mask_all[k+1][:, :, None, None, :].repeat(attn.shape[0] // mask_all[k+1].shape[1], 1, 1, 1, 1).view(-1, 1, 1, mask_all[k+1].shape[-1])
+                    
+                offset += (self.focal_window+2**k-1)**2
+        
+        if mask_all[0] is not None:
+            nW = mask_all[0].shape[0]
+            attn = attn.view(attn.shape[0] // nW, nW, self.num_heads, window_area, N)
+            attn[:, :, :, :, :window_area] = attn[:, :, :, :, :window_area] + mask_all[0][None, :, None, :, :]
+            attn = attn.view(-1, self.num_heads, window_area, N)
+            attn = self.softmax(attn)
+        else:          
+            attn = self.softmax(attn)
+        
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v_all).transpose(1, 2).reshape(attn.shape[0], window_area, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+    def extra_repr(self) -> str:
+        return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}'
+
+    def flops(self, N, window_size, unfold_size):
+        # calculate flops for 1 window with token length of N
+        flops = 0
+        # qkv = self.qkv(x)
+        flops += N * self.dim * 3 * self.dim
+        # attn = (q @ k.transpose(-2, -1))
+        flops += self.num_heads * N * (self.dim // self.num_heads) * N        
+        if self.pool_method != "none" and self.focal_level > 1:
+            flops += self.num_heads * N * (self.dim // self.num_heads) * (unfold_size * unfold_size)          
+        if self.expand_size > 0 and self.focal_level > 0:
+            flops += self.num_heads * N * (self.dim // self.num_heads) * ((window_size + 2*self.expand_size)**2-window_size**2)          
+
+        #  x = (attn @ v)
+        flops += self.num_heads * N * N * (self.dim // self.num_heads)
+        if self.pool_method != "none" and self.focal_level > 1:
+            flops += self.num_heads * N * (self.dim // self.num_heads) * (unfold_size * unfold_size)          
+        if self.expand_size > 0 and self.focal_level > 0:
+            flops += self.num_heads * N * (self.dim // self.num_heads) * ((window_size + 2*self.expand_size)**2-window_size**2)          
+
+        # x = self.proj(x)
+        flops += N * self.dim * self.dim
+        return 
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/MaskEncoding.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/MaskEncoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..412602709dcedf9014fbd271dfd191230f7a0fa0
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/MaskEncoding.py
@@ -0,0 +1,124 @@
+import torch
+import torch.nn as nn
+
+VALUE_MAX = 0.05
+VALUE_MIN = 0.01
+
+
+@torch.no_grad()
+class PCAMaskEncoding(nn.Module):
+    """
+    To do the mask encoding of PCA.
+        components_: (tensor), shape (n_components, n_features) if agnostic=True
+                                else (n_samples, n_components, n_features)
+        explained_variance_: Variance explained by each of the selected components.
+                            (tensor), shape (n_components) if agnostic=True
+                                        else (n_samples, n_components)
+        mean_: (tensor), shape (n_features) if agnostic=True
+                          else (n_samples, n_features)
+        agnostic: (bool), whether class_agnostic or class_specific.
+        whiten : (bool), optional
+        When True (False by default) the ``components_`` vectors are divided
+        by ``n_samples`` times ``components_`` to ensure uncorrelated outputs
+        with unit component-wise variances.
+        Whitening will remove some information from the transformed signal
+        (the relative variance scales of the components) but can sometimes
+        improve the predictive accuracy of the downstream estimators by
+        making data respect some hard-wired assumptions.
+        sigmoid: (bool) whether to apply inverse sigmoid before transform.
+    """
+    def __init__(self, cfg):
+        super().__init__()
+        self.cfg = cfg
+        self.agnostic = True #cfg.MODEL.SWINTS.AGNOSTIC
+        self.whiten = True #cfg.MODEL.SWINTS.WHITEN
+        self.sigmoid = True #cfg.MODEL.SWINTS.SIGMOID
+        self.dim_mask = cfg.MODEL.SWINTS.MASK_DIM
+        self.mask_size = 28 #cfg.MODEL.SWINTS.MASK_SIZE
+
+        if self.agnostic:
+            self.components = nn.Parameter(torch.zeros(self.dim_mask, self.mask_size**2), requires_grad=False)
+            self.explained_variances = nn.Parameter(torch.zeros(self.dim_mask), requires_grad=False)
+            self.means = nn.Parameter(torch.zeros(self.mask_size**2), requires_grad=False)
+        else:
+            raise NotImplementedError
+
+    def inverse_sigmoid(self, x):
+        """Apply the inverse sigmoid operation.
+                y = -ln(1-x/x)
+        """
+        # In case of overflow
+        value_random = VALUE_MAX * torch.rand_like(x)
+        value_random = torch.where(value_random > VALUE_MIN, value_random, VALUE_MIN * torch.ones_like(x))
+        x = torch.where(x > value_random, 1 - value_random, value_random)
+        # inverse sigmoid
+        y = -1 * torch.log((1 - x) / x)
+        return y
+
+    def encoder(self, X):
+        """Apply dimensionality reduction to X.
+        X is projected on the first principal components previously extracted
+        from a training set.
+        Parameters
+        ----------
+        X : Original features(tensor), shape (n_samples, n_features)
+            New data, where n_samples is the number of samples
+            and n_features is the number of features.
+
+        Returns
+        -------
+        X_transformed : Transformed features(tensor), shape (n_samples, n_features)
+        """
+        assert X.shape[1] == self.mask_size**2, print("The original mask_size of input"
+                                                      " should be equal to the supposed size.")
+
+        if self.sigmoid:
+            X = self.inverse_sigmoid(X)
+
+        if self.agnostic:
+            if self.means is not None:
+                X_transformed = X - self.means
+            X_transformed = torch.matmul(X_transformed, self.components.T)
+            if self.whiten:
+                X_transformed /= torch.sqrt(self.explained_variances)
+        else:
+            # TODO: The class-specific version has not implemented.
+            raise NotImplementedError
+
+        return X_transformed
+
+    def decoder(self, X, is_train=False):
+        """Transform data back to its original space.
+        In other words, return an input X_original whose transform would be X.
+        Parameters
+        ----------
+        X : Encoded features(tensor), shape (n_samples, n_components)
+            New data, where n_samples is the number of samples
+            and n_components is the number of components.
+
+        Returns
+        -------
+        X_original original features(tensor), shape (n_samples, n_features)
+        """
+        assert X.shape[1] == self.dim_mask, print("The dim of transformed data "
+                                                  "should be equal to the supposed dim.")
+
+        if self.agnostic:
+            if self.whiten:
+                components_ = self.components * torch.sqrt(self.explained_variances.unsqueeze(1))
+            X_transformed = torch.matmul(X, components_)
+            if self.means is not None:
+                X_transformed = X_transformed + self.means
+        else:
+            # TODO: The class-specific version has not implemented.
+            raise NotImplementedError
+
+        if is_train:
+            pass
+        else:
+            if self.sigmoid:
+                X_transformed = torch.sigmoid(X_transformed)
+            else:
+                X_transformed = torch.clamp(X_transformed, min=0.01, max=0.99)
+
+        return X_transformed
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..db28125d64eea1bc95504d276182989a7452d688
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/__init__.py
@@ -0,0 +1,3 @@
+from .config import add_SWINTS_config
+from .swints import SWINTS
+from .dataset_mapper import SWINTSDatasetMapper
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/beam_search.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/beam_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e39f0223cfb3c6752c091a77da83812afaeee30
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/beam_search.py
@@ -0,0 +1,95 @@
+import torch
+from .topk import TopK
+
+class BeamNode(object):
+    def __init__(self, seq, state, score):
+        self.seq = seq
+        self.state = state
+        self.score = score
+        self.avg_score = score / len(seq)
+
+    def __cmp__(self, other):
+        if self.avg_score == other.avg_score:
+            return 0
+        elif self.avg_score < other.avg_score:
+            return -1
+        else:
+            return 1
+
+    def __lt__(self, other):
+        return self.avg_score < other.avg_score
+
+    def __eq__(self, other):
+        return self.avg_score == other.avg_score
+
+class BeamSearch(object):
+    """Class to generate sequences from an image-to-text model."""
+
+    def __init__(self,
+                 decode_step,
+                 eos,
+                 beam_size=2,
+                 max_seq_len=32):
+        self.decode_step = decode_step
+        self.eos = eos
+        self.beam_size = beam_size
+        self.max_seq_len = max_seq_len
+
+    def beam_search(self, init_inputs, init_states):
+        # self.beam_size = 1
+        batch_size = len(init_inputs)
+        part_seqs = [TopK(self.beam_size) for _ in range(batch_size)]
+        comp_seqs = [TopK(self.beam_size) for _ in range(batch_size)]
+
+        # print(init_inputs.shape, init_states.shape)
+        words, scores, states = self.decode_step(init_inputs, init_states, k=self.beam_size)
+        for batch_id in range(batch_size):
+            for i in range(self.beam_size):
+                node = BeamNode([words[batch_id][i]], states[:, :, batch_id, :], scores[batch_id][i])
+                part_seqs[batch_id].push(node)
+
+        for t in range(self.max_seq_len - 1):
+            part_seq_list = []
+            for p in part_seqs:
+                part_seq_list.append(p.extract())
+                p.reset()
+
+            inputs, states = [], []
+            for seq_list in part_seq_list:
+                for node in seq_list:
+                    inputs.append(node.seq[-1])
+                    states.append(node.state)
+            if len(inputs) == 0:
+                break
+
+            inputs = torch.stack(inputs)
+            states = torch.stack(states, dim=2)
+            words, scores, states = self.decode_step(inputs, states, k=self.beam_size + 1)
+
+            idx = 0
+            for batch_id in range(batch_size):
+                for node in part_seq_list[batch_id]:
+                    tmp_state = states[:, :, idx, :]
+                    k = 0
+                    num_hyp = 0
+                    while num_hyp < self.beam_size:
+                        word = words[idx][k]
+                        tmp_seq = node.seq + [word]
+                        tmp_score = node.score + scores[idx][k]
+                        tmp_node = BeamNode(tmp_seq, tmp_state, tmp_score)
+                        k += 1
+                        num_hyp += 1
+
+                        if word == self.eos:
+                            comp_seqs[batch_id].push(tmp_node)
+                            num_hyp -= 1
+                        else:
+                            part_seqs[batch_id].push(tmp_node)
+                    idx += 1
+
+        for batch_id in range(batch_size):
+            if not comp_seqs[batch_id].size():
+                comp_seqs[batch_id] = part_seqs[batch_id]
+        seqs = [seq_list.extract(sort=True)[0].seq for seq_list in comp_seqs]
+        seq_scores = [seq_list.extract(sort=True)[0].avg_score for seq_list in comp_seqs]
+        return seqs, seq_scores
\ No newline at end of file
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/config.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b72b42444671a0670107e93eaafb78db2a9c815
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/config.py
@@ -0,0 +1,80 @@
+from detectron2.config import CfgNode as CN
+
+
+def add_SWINTS_config(cfg):
+    """
+    Add config for SWINTS.
+    """
+    cfg.MODEL.SWINTS = CN()
+    cfg.MODEL.SWINTS.NUM_CLASSES = 80
+    cfg.MODEL.SWINTS.NUM_PROPOSALS = 300
+    cfg.MODEL.SWINTS.TEST_NUM_PROPOSALS = 100
+
+    # RCNN Head.
+    cfg.MODEL.SWINTS.NHEADS = 8
+    cfg.MODEL.SWINTS.DROPOUT = 0.0
+    cfg.MODEL.SWINTS.DIM_FEEDFORWARD = 2048
+    cfg.MODEL.SWINTS.ACTIVATION = 'relu'
+    cfg.MODEL.SWINTS.HIDDEN_DIM = 256
+    cfg.MODEL.SWINTS.NUM_CLS = 3
+    cfg.MODEL.SWINTS.NUM_REG = 3
+    cfg.MODEL.SWINTS.NUM_MASK = 3
+    cfg.MODEL.SWINTS.NUM_HEADS = 6
+
+    cfg.MODEL.SWINTS.MASK_DIM = 60
+
+
+    # Dynamic Conv.
+    cfg.MODEL.SWINTS.NUM_DYNAMIC = 2
+    cfg.MODEL.SWINTS.DIM_DYNAMIC = 64
+
+    # Recognition Head
+    cfg.MODEL.REC_HEAD = CN()
+    cfg.MODEL.REC_HEAD.BATCH_SIZE = 48
+    cfg.MODEL.REC_HEAD.POOLER_RESOLUTION = (28,28)
+    cfg.MODEL.REC_HEAD.RESOLUTION = (32, 32)
+    cfg.MODEL.REC_HEAD.NUM_CLASSES = 107
+
+    # Loss.
+    cfg.MODEL.SWINTS.CLASS_WEIGHT = 2.0
+    cfg.MODEL.SWINTS.GIOU_WEIGHT = 2.0
+    cfg.MODEL.SWINTS.L1_WEIGHT = 5.0
+    cfg.MODEL.SWINTS.REC_WEIGHT = 1.0
+    cfg.MODEL.SWINTS.DEEP_SUPERVISION = True
+    cfg.MODEL.SWINTS.NO_OBJECT_WEIGHT = 0.1
+    cfg.MODEL.SWINTS.MASK_WEIGHT = 2.0
+
+    # Focal Loss.
+    cfg.MODEL.SWINTS.ALPHA = 0.25
+    cfg.MODEL.SWINTS.GAMMA = 2.0
+    cfg.MODEL.SWINTS.PRIOR_PROB = 0.01
+
+    # Optimizer.
+    cfg.SOLVER.OPTIMIZER = "ADAMW"
+    cfg.SOLVER.BACKBONE_MULTIPLIER = 1.0
+
+    # Matcher
+    cfg.MODEL.SWINTS.IOU_THRESHOLDS = [0.5]
+    cfg.MODEL.SWINTS.IOU_LABELS = [0, 1]
+
+    # Encoder
+    cfg.MODEL.SWINTS.PATH_COMPONENTS = "./src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz"
+    
+    # SWINT backbone
+    cfg.MODEL.SWINT = CN()
+    cfg.MODEL.SWINT.EMBED_DIM = 96
+    cfg.MODEL.SWINT.OUT_FEATURES = ["stage2", "stage3", "stage4", "stage5"]
+    cfg.MODEL.SWINT.DEPTHS = [2, 2, 6, 2]
+    cfg.MODEL.SWINT.NUM_HEADS = [3, 6, 12, 24]
+    cfg.MODEL.SWINT.WINDOW_SIZE = 7
+    cfg.MODEL.SWINT.MLP_RATIO = 4
+    cfg.MODEL.SWINT.DROP_PATH_RATE = 0.2
+    cfg.MODEL.SWINT.APE = False
+    cfg.MODEL.BACKBONE.FREEZE_AT = -1
+
+    # addation
+    cfg.MODEL.FPN.TOP_LEVELS = 2
+
+    # Test config
+    cfg.TEST.USE_NMS_IN_TSET = True
+    cfg.TEST.INFERENCE_TH_TEST = 0.4
\ No newline at end of file
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/dataset_mapper.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8d6af13294c86fb5e1dc21c3680b1e9595f87aa
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/dataset_mapper.py
@@ -0,0 +1,142 @@
+import copy
+import logging
+
+import numpy as np
+import torch
+
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.data.transforms import TransformGen
+from detectron2.structures import BoxMode
+from PIL import Image
+
+__all__ = ["SWINTSDatasetMapper"]
+
+
+def build_transform_gen(cfg, is_train):
+    """
+    Create a list of :class:`TransformGen` from config.
+    Returns:
+        list[TransformGen]
+    """
+    if is_train:
+        min_size = cfg.INPUT.MIN_SIZE_TRAIN
+        max_size = cfg.INPUT.MAX_SIZE_TRAIN
+        sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
+    else:
+        min_size = cfg.INPUT.MIN_SIZE_TEST
+        max_size = cfg.INPUT.MAX_SIZE_TEST
+        sample_style = "choice"
+    if sample_style == "range":
+        assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format(len(min_size))
+
+    logger = logging.getLogger(__name__)
+    tfm_gens = []
+    tfm_gens.append(T.RandomBrightness(0.5,2))
+    tfm_gens.append(T.RandomContrast(0.5,2))
+    tfm_gens.append(T.RandomSaturation(0.5,2))
+    tfm_gens.append(T.ResizeShortestEdge(min_size, max_size, sample_style))
+    if is_train:
+        logger.info("TransformGens used in training: " + str(tfm_gens))
+    return tfm_gens
+
+@torch.no_grad()
+class SWINTSDatasetMapper:
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by SparseRCNN.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies geometric transforms to the image and annotation
+    3. Find and applies suitable cropping to the image and annotation
+    4. Prepare image and annotation to Tensors
+    """
+
+    def __init__(self, cfg, is_train=True):
+        if cfg.INPUT.CROP.ENABLED and is_train:
+            self.crop_gen = [
+                #T.ResizeShortestEdge([400, 500, 600], sample_style="choice"),
+                #T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE),
+                T.RandomCropWithInstance(
+                    cfg.INPUT.CROP.TYPE,
+                    cfg.INPUT.CROP.SIZE,
+                    cfg.INPUT.CROP.CROP_INSTANCE
+                    )
+            ]
+            self.rotate_gen = [
+                    T.RandomRotation(angle=[-90,90],sample_style="range")
+                    ]
+        else:
+            self.crop_gen = None
+        self.tfm_gens = build_transform_gen(cfg, is_train)
+        logging.getLogger(__name__).info(
+            "Full TransformGens used in training: {}, crop: {}".format(str(self.tfm_gens), str(self.crop_gen))
+        )
+
+        self.img_format = cfg.INPUT.FORMAT
+        self.is_train = is_train
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        boxes = np.asarray(
+            [
+                BoxMode.convert(
+                    instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS
+                )
+                for instance in dataset_dict["annotations"]
+            ]
+        )
+        augmentation = []
+        if self.crop_gen is None:
+            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
+        else:
+            if np.random.rand() > 0.5:
+                augmentation = self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:]
+            else:
+                augmentation = self.tfm_gens
+            if np.random.rand() > 0.5:
+                augmentation = augmentation[:-1] + self.rotate_gen + augmentation[-1:]
+            aug_input = T.StandardAugInput(image, boxes=boxes)
+            transforms = aug_input.apply_augmentations(augmentation)
+            image = aug_input.image
+        
+        image_shape = image.shape[:2]  # h, w
+        # print(image_shape)
+
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+
+        if not self.is_train:
+            # USER: Modify this if you want to keep them for some reason.
+            dataset_dict.pop("annotations", None)
+            return dataset_dict
+
+        if "annotations" in dataset_dict:
+            # USER: Modify this if you want to keep them for some reason.
+            for anno in dataset_dict["annotations"]:
+                # anno.pop("segmentation", None)
+                anno.pop("keypoints", None)
+
+            # USER: Implement additional transformations if you have other types of data
+            annos = [
+                utils.transform_instance_annotations(obj, transforms, image_shape)
+                for obj in dataset_dict.pop("annotations")
+                if obj.get("iscrowd", 0) == 0
+            ]
+            instances = utils.annotations_to_instances(annos, image_shape)
+            dataset_dict["instances"] = utils.filter_empty_instances(instances)
+        return dataset_dict
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/head.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/head.py
new file mode 100644
index 0000000000000000000000000000000000000000..7540b076dc21797f0e8af321c2e0800a57e2342e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/head.py
@@ -0,0 +1,493 @@
+import copy
+import math
+from typing import Optional, List
+
+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+
+from detectron2.modeling.poolers import ROIPooler, cat
+from detectron2.structures import Boxes, pairwise_iou
+
+from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, get_norm
+
+from detectron2.modeling.matcher import Matcher
+from .rec_stage import REC_STAGE   
+
+_DEFAULT_SCALE_CLAMP = math.log(100000.0 / 16)
+
+def _get_src_permutation_idx(indices):
+# permute predictions following indices
+    batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+    src_idx = torch.cat([src for (src, _) in indices])
+    return batch_idx, src_idx
+
+def _get_tgt_permutation_idx(indices):
+# permute targets following indices
+    batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+    tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+    return batch_idx, tgt_idx
+
+class DynamicHead(nn.Module):
+
+    def __init__(self, cfg, roi_input_shape):
+        super().__init__()
+
+        # Build RoI.
+        box_pooler = self._init_box_pooler(cfg, roi_input_shape)
+        self.box_pooler = box_pooler
+        box_pooler_rec = self._init_box_pooler_rec(cfg, roi_input_shape)
+        self.box_pooler_rec = box_pooler_rec
+
+        # Build heads.
+        num_classes = cfg.MODEL.SWINTS.NUM_CLASSES
+        self.hidden_dim = cfg.MODEL.SWINTS.HIDDEN_DIM
+        dim_feedforward = cfg.MODEL.SWINTS.DIM_FEEDFORWARD
+        nhead = cfg.MODEL.SWINTS.NHEADS
+        dropout = cfg.MODEL.SWINTS.DROPOUT
+        activation = cfg.MODEL.SWINTS.ACTIVATION
+        self.train_num_proposal = cfg.MODEL.SWINTS.NUM_PROPOSALS
+        self.num_heads = cfg.MODEL.SWINTS.NUM_HEADS
+        rcnn_head = RCNNHead(cfg, self.hidden_dim, num_classes, dim_feedforward, nhead, dropout, activation)
+        self.head_series = _get_clones(rcnn_head, self.num_heads)
+        self.return_intermediate = cfg.MODEL.SWINTS.DEEP_SUPERVISION
+        
+        self.cfg =cfg
+
+        # Build recognition heads
+        self.rec_stage = REC_STAGE(cfg, self.hidden_dim, num_classes, dim_feedforward, nhead, dropout, activation)
+        self.cnn = nn.Sequential(
+                                nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,1),
+                                nn.BatchNorm2d(self.hidden_dim),
+                                nn.ReLU(True),
+                                nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,1),
+                                nn.BatchNorm2d(self.hidden_dim),
+                                nn.ReLU(True),
+                                )
+
+        #DC
+        self.conv = nn.ModuleList([
+                           nn.Sequential(
+                           nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,2,2),
+                           nn.BatchNorm2d(self.hidden_dim),
+                           nn.ReLU(True),                    
+                           nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,4,4),              
+                           nn.BatchNorm2d(self.hidden_dim),                                   
+                           nn.ReLU(True),                        
+                           nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,1),              
+                           nn.BatchNorm2d(self.hidden_dim),                                 
+                           nn.ReLU(True),)                                     
+                           for i in range(4) 
+                           ]                 
+                           )
+        
+        
+        # Init parameters.
+        self.num_classes = num_classes
+        prior_prob = cfg.MODEL.SWINTS.PRIOR_PROB
+        self.bias_value = -math.log((1 - prior_prob) / prior_prob)
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        # init all parameters.
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+
+            # initialize the bias for focal loss.
+            if p.shape[-1] == self.num_classes:
+                nn.init.constant_(p, self.bias_value)
+
+    @staticmethod
+    def _init_box_pooler(cfg, input_shape):
+
+        in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+
+        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
+        # then we share the same predictors and therefore the channel counts must be the same
+        in_channels = [input_shape[f].channels for f in in_features]
+        # Check all channel counts are equal
+        assert len(set(in_channels)) == 1, in_channels
+
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        return box_pooler
+    @staticmethod
+    def _init_box_pooler_rec(cfg, input_shape):
+        in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.REC_HEAD.POOLER_RESOLUTION
+        pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+
+        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
+        # then we share the same predictors and therefore the channel counts must be the same
+        in_channels = [input_shape[f].channels for f in in_features]
+        # Check all channel counts are equal
+        assert len(set(in_channels)) == 1, in_channels
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales= pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        return box_pooler
+   
+    def extra_rec_feat(self, matcher, mask_encoding, targets, N, bboxes, class_logits, pred_bboxes, mask_logits, proposal_features, features):
+        gt_masks = list()
+        gt_boxes = list()
+        proposal_boxes_pred = list()
+        masks_pred = list()
+        pred_mask = mask_logits.detach()
+
+        N, nr_boxes = bboxes.shape[:2]
+        if targets:
+            output = {'pred_logits': class_logits, 'pred_boxes': pred_bboxes, 'pred_masks': mask_logits}
+            indices = matcher(output, targets, mask_encoding)
+            idx = _get_src_permutation_idx(indices)
+            target_rec = torch.cat([t['rec'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+            target_rec = target_rec.repeat(2,1)
+        else:
+            idx = None
+            scores = torch.sigmoid(class_logits)
+            labels = torch.arange(2, device=bboxes.device).\
+                    unsqueeze(0).repeat(self.train_num_proposal, 1).flatten(0, 1)
+            inter_class_logits = []
+            inter_pred_bboxes = []
+            inter_pred_masks = []
+            inter_pred_label = []
+        for b in range(N):
+            if targets:
+                gt_boxes.append(Boxes(targets[b]['boxes_xyxy'][indices[b][1]]))
+                gt_masks.append(targets[b]['gt_masks'][indices[b][1]])
+                proposal_boxes_pred.append(Boxes(bboxes[b][indices[b][0]]))
+                tmp_mask = mask_encoding.decoder(pred_mask[b]).view(-1,28,28)
+                tmp_mask = tmp_mask[indices[b][0]]
+                tmp_mask2 = torch.full_like(tmp_mask,0).cuda()
+                tmp_mask2[tmp_mask>0.4]=1
+                masks_pred.append(tmp_mask2)
+            else:
+                # post_processing
+                num_proposals = self.cfg.MODEL.SWINTS.TEST_NUM_PROPOSALS
+                scores_per_image, topk_indices = scores[b].flatten(0, 1).topk(num_proposals, sorted=False)
+                labels_per_image = labels[topk_indices]
+                box_pred_per_image = bboxes[b].view(-1, 1, 4).repeat(1, 2, 1).view(-1, 4)
+                box_pred_per_image = box_pred_per_image[topk_indices]
+                mask_pred_per_image = mask_logits.view(-1, self.cfg.MODEL.SWINTS.MASK_DIM)
+                mask_pred_per_image = mask_encoding.decoder(mask_pred_per_image, is_train=False)
+                mask_pred_per_image = mask_pred_per_image.view(-1, 1, 28, 28)
+                n, c, w, h = mask_pred_per_image.size()
+                mask_pred_per_image = torch.repeat_interleave(mask_pred_per_image,2,1).view(-1, c, w, h)
+                mask_pred_per_image = mask_pred_per_image[topk_indices]
+                proposal_features = proposal_features[b].view(-1, 1, self.hidden_dim).repeat(1, 2, 1).view(-1, self.hidden_dim)
+                proposal_features = proposal_features[topk_indices]
+                proposal_boxes_pred.append(Boxes(box_pred_per_image))
+                gt_masks.append(mask_pred_per_image)
+                inter_class_logits.append(scores_per_image)
+                inter_pred_bboxes.append(box_pred_per_image)
+                inter_pred_masks.append(mask_pred_per_image)
+                inter_pred_label.append(labels_per_image)
+
+        # get recognition roi region
+        if targets:
+            gt_roi_features = self.box_pooler_rec(features, gt_boxes)
+            pred_roi_features = self.box_pooler_rec(features, proposal_boxes_pred)
+            masks_pred = torch.cat(masks_pred).cuda()
+            gt_masks = torch.cat(gt_masks).cuda()
+            rec_map = torch.cat((gt_roi_features,pred_roi_features),0)
+            gt_masks = torch.cat((gt_masks,masks_pred),0)
+        else:
+            rec_map = self.box_pooler_rec(features, proposal_boxes_pred)
+            gt_masks = torch.cat(gt_masks).cuda()
+            nr_boxes = rec_map.shape[0]
+        if targets:
+            rec_map = rec_map[:self.cfg.MODEL.REC_HEAD.BATCH_SIZE]
+        else:
+            gt_masks_b = torch.full_like(gt_masks,0).cuda()
+            gt_masks_b[gt_masks>0.4]=1
+            gt_masks_b = gt_masks_b.squeeze()
+            gt_masks = gt_masks_b
+            del gt_masks_b
+        if targets:
+            return proposal_features, gt_masks[:self.cfg.MODEL.REC_HEAD.BATCH_SIZE], idx, rec_map, target_rec[:self.cfg.MODEL.REC_HEAD.BATCH_SIZE]
+        else:
+            return inter_class_logits, inter_pred_bboxes, inter_pred_masks, inter_pred_label, proposal_features, gt_masks, idx, rec_map, nr_boxes
+
+    def forward(self, features, init_bboxes, init_features, targets = None, mask_encoding = None, matcher=None):
+    
+        inter_class_logits = []
+        inter_pred_bboxes = []
+        inter_pred_masks = []
+        inter_pred_label = []
+
+        bs = len(features[0])
+        bboxes = init_bboxes
+        proposal_features = init_features.clone()
+        for i_idx in range(len(features)):
+           features[i_idx] = self.conv[i_idx](features[i_idx]) + features[i_idx]
+        for i, rcnn_head in enumerate(self.head_series):
+
+            class_logits, pred_bboxes, proposal_features, mask_logits = rcnn_head(features, bboxes, proposal_features, self.box_pooler)
+            if self.return_intermediate:
+                inter_class_logits.append(class_logits)
+                inter_pred_bboxes.append(pred_bboxes)
+                inter_pred_masks.append(mask_logits)
+            bboxes = pred_bboxes.detach()
+        
+        # extract recognition feature.
+        N, nr_boxes = bboxes.shape[:2]
+        if targets:
+            proposal_features, gt_masks, idx, rec_map, target_rec = \
+                self.extra_rec_feat(matcher, mask_encoding, targets, N, bboxes, class_logits, pred_bboxes, mask_logits, proposal_features, features)
+        else:
+            inter_class_logits, inter_pred_bboxes, inter_pred_masks, inter_pred_label, proposal_features, gt_masks, idx, rec_map, nr_boxes = \
+                self.extra_rec_feat(matcher, mask_encoding, targets, N, bboxes, class_logits, pred_bboxes, mask_logits, proposal_features, features)
+       
+        rec_map = self.cnn(rec_map)
+        rec_proposal_features = proposal_features.clone()
+
+        if targets:
+            rec_result = self.rec_stage(rec_map, rec_proposal_features, gt_masks, N, nr_boxes, idx, target_rec)
+        else:
+            rec_result = self.rec_stage(rec_map, rec_proposal_features, gt_masks, N, nr_boxes)
+            rec_result = torch.tensor(rec_result)
+        if self.return_intermediate:
+            return torch.stack(inter_class_logits), torch.stack(inter_pred_bboxes), torch.stack(inter_pred_masks), rec_result
+        return class_logits[None], pred_bboxes[None], mask_logits[None]
+
+
+class RCNNHead(nn.Module):
+
+    def __init__(self, cfg, d_model, num_classes, dim_feedforward=2048, nhead=8, dropout=0.1, activation="relu",
+                 scale_clamp: float = _DEFAULT_SCALE_CLAMP, bbox_weights=(2.0, 2.0, 1.0, 1.0)):
+        super().__init__()
+
+        self.d_model = d_model
+
+        # dynamic.
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.inst_interact = DynamicConv(cfg)
+
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+
+        self.activation = nn.ELU(inplace=True)
+
+        # cls.
+        num_cls = cfg.MODEL.SWINTS.NUM_CLS
+        cls_module = list()
+        for _ in range(num_cls):
+            cls_module.append(nn.Linear(d_model, d_model, False))
+            cls_module.append(nn.LayerNorm(d_model))
+            cls_module.append(nn.ELU(inplace=True))
+        self.cls_module = nn.ModuleList(cls_module)
+
+        # reg.
+        num_reg = cfg.MODEL.SWINTS.NUM_REG
+        reg_module = list()
+        for _ in range(num_reg):
+            reg_module.append(nn.Linear(d_model, d_model, False))
+            reg_module.append(nn.LayerNorm(d_model))
+            reg_module.append(nn.ELU(inplace=True))
+        self.reg_module = nn.ModuleList(reg_module)
+
+        # mask.
+        num_mask = cfg.MODEL.SWINTS.NUM_MASK
+        mask_module = list()
+        for _ in range(num_mask):
+            mask_module.append(nn.Linear(d_model, d_model, False))
+            mask_module.append(nn.LayerNorm(d_model))
+            mask_module.append(nn.ELU(inplace=True))
+        self.mask_module = nn.ModuleList(mask_module)
+        self.mask_logits = nn.Linear(d_model, cfg.MODEL.SWINTS.MASK_DIM)
+
+        # pred.
+        self.class_logits = nn.Linear(d_model, num_classes)
+        self.bboxes_delta = nn.Linear(d_model, 4)
+        self.scale_clamp = scale_clamp
+        self.bbox_weights = bbox_weights
+
+
+    def forward(self, features, bboxes, pro_features, pooler):
+        """
+        :param bboxes: (N, nr_boxes, 4)
+        :param pro_features: (N, nr_boxes, d_model)
+        """
+
+        N, nr_boxes = bboxes.shape[:2]
+        
+        # roi_feature.
+        proposal_boxes = list()
+        for b in range(N):
+            proposal_boxes.append(Boxes(bboxes[b]))
+        roi_features = pooler(features, proposal_boxes)
+        roi_features = roi_features.view(N * nr_boxes, self.d_model, -1).permute(2, 0, 1)        
+
+        # self_att.
+        pro_features = pro_features.view(N, nr_boxes, self.d_model).permute(1, 0, 2)
+        pro_features2 = self.self_attn(pro_features, pro_features, value=pro_features)[0]
+        pro_features = pro_features + self.dropout1(pro_features2)
+
+        del pro_features2
+
+        pro_features = self.norm1(pro_features)
+
+        # inst_interact.
+        pro_features = pro_features.view(nr_boxes, N, self.d_model).permute(1, 0, 2).reshape(1, N * nr_boxes, self.d_model)
+        pro_features2 = self.inst_interact(pro_features, roi_features)
+        pro_features = pro_features + self.dropout2(pro_features2)
+
+        del pro_features2
+
+        obj_features = self.norm2(pro_features)
+
+        # obj_feature.
+        obj_features2 = self.linear2(self.dropout(self.activation(self.linear1(obj_features))))
+        obj_features = obj_features + self.dropout3(obj_features2)
+
+        del obj_features2
+
+        obj_features = self.norm3(obj_features)
+        
+        fc_feature = obj_features.transpose(0, 1).reshape(N * nr_boxes, -1)
+        cls_feature = fc_feature.clone()
+        reg_feature = fc_feature.clone()
+
+        mask_feature = fc_feature.clone()
+
+        del fc_feature
+
+        for mask_layer in self.mask_module:
+            mask_feature = mask_layer(mask_feature)
+        mask_logits = self.mask_logits(mask_feature)
+        del mask_feature
+
+        for cls_layer in self.cls_module:
+            cls_feature = cls_layer(cls_feature)
+        for reg_layer in self.reg_module:
+            reg_feature = reg_layer(reg_feature)
+        class_logits = self.class_logits(cls_feature)
+        bboxes_deltas = self.bboxes_delta(reg_feature)
+
+        del cls_feature
+        del reg_feature
+
+        pred_bboxes = self.apply_deltas(bboxes_deltas, bboxes.view(-1, 4))
+        
+        return class_logits.view(N, nr_boxes, -1), pred_bboxes.view(N, nr_boxes, -1), obj_features, mask_logits.view(N, nr_boxes, -1)
+    
+
+    def apply_deltas(self, deltas, boxes):
+        """
+        Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`.
+
+        Args:
+            deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1.
+                deltas[i] represents k potentially different class-specific
+                box transformations for the single box boxes[i].
+            boxes (Tensor): boxes to transform, of shape (N, 4)
+        """
+        boxes = boxes.to(deltas.dtype)
+
+        widths = boxes[:, 2] - boxes[:, 0]
+        heights = boxes[:, 3] - boxes[:, 1]
+        ctr_x = boxes[:, 0] + 0.5 * widths
+        ctr_y = boxes[:, 1] + 0.5 * heights
+
+        wx, wy, ww, wh = self.bbox_weights
+        dx = deltas[:, 0::4] / wx
+        dy = deltas[:, 1::4] / wy
+        dw = deltas[:, 2::4] / ww
+        dh = deltas[:, 3::4] / wh
+
+        # Prevent sending too large values into torch.exp()
+        dw = torch.clamp(dw, max=self.scale_clamp)
+        dh = torch.clamp(dh, max=self.scale_clamp)
+
+        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
+        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
+        pred_w = torch.exp(dw) * widths[:, None]
+        pred_h = torch.exp(dh) * heights[:, None]
+
+        pred_boxes = torch.zeros_like(deltas)
+        pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w  # x1
+        pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h  # y1
+        pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w  # x2
+        pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h  # y2
+
+        return pred_boxes
+
+
+class DynamicConv(nn.Module):
+
+    def __init__(self, cfg):
+        super().__init__()
+
+        self.hidden_dim = cfg.MODEL.SWINTS.HIDDEN_DIM
+        self.dim_dynamic = cfg.MODEL.SWINTS.DIM_DYNAMIC
+        self.num_dynamic = cfg.MODEL.SWINTS.NUM_DYNAMIC
+        self.num_params = self.hidden_dim * self.dim_dynamic
+        self.dynamic_layer = nn.Linear(self.hidden_dim, self.num_dynamic * self.num_params)
+
+        self.norm1 = nn.LayerNorm(self.dim_dynamic)
+        self.norm2 = nn.LayerNorm(self.hidden_dim)
+
+        self.activation = nn.ELU(inplace=True)
+
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        num_output = self.hidden_dim * pooler_resolution ** 2
+        self.out_layer = nn.Linear(num_output, self.hidden_dim)
+        self.norm3 = nn.LayerNorm(self.hidden_dim)
+
+    def forward(self, pro_features, roi_features):
+        '''
+        pro_features: (1,  N * nr_boxes, self.d_model)
+        roi_features: (49, N * nr_boxes, self.d_model)
+        '''
+        features = roi_features.permute(1, 0, 2)
+        parameters = self.dynamic_layer(pro_features).permute(1, 0, 2)
+
+        param1 = parameters[:, :, :self.num_params].view(-1, self.hidden_dim, self.dim_dynamic)
+        param2 = parameters[:, :, self.num_params:].view(-1, self.dim_dynamic, self.hidden_dim)
+
+        del parameters
+
+        features = torch.bmm(features, param1)
+
+        del param1
+
+        features = self.norm1(features)
+        features = self.activation(features)
+
+        features = torch.bmm(features, param2)
+
+        del param2
+
+        features = self.norm2(features)
+        features = self.activation(features)
+
+        features = features.flatten(1)
+        features = self.out_layer(features)
+        features = self.norm3(features)
+        features = self.activation(features)
+
+        return features
+
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/loss.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0edff08e5a2a825e01a1ef7fc6d995cb98c210a
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/loss.py
@@ -0,0 +1,227 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+from fvcore.nn import sigmoid_focal_loss_jit
+
+from .util import box_ops
+from .util.misc import (NestedTensor, nested_tensor_from_tensor_list,
+                       accuracy, get_world_size, interpolate,
+                       is_dist_avail_and_initialized)
+from .util.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
+
+from scipy.optimize import linear_sum_assignment
+
+
+class SetCriterion(nn.Module):
+    def __init__(self, cfg, num_classes, matcher, weight_dict, eos_coef, losses):
+        super().__init__()
+        self.cfg = cfg
+        self.num_classes = num_classes
+        self.matcher = matcher
+        self.weight_dict = weight_dict
+        self.eos_coef = eos_coef
+        self.losses = losses
+        self.cfg = cfg
+
+        self.focal_loss_alpha = cfg.MODEL.SWINTS.ALPHA
+        self.focal_loss_gamma = cfg.MODEL.SWINTS.GAMMA
+
+    def loss_labels(self, outputs, targets, indices, num_boxes, mask_encoding):
+        assert 'pred_logits' in outputs
+        src_logits = outputs['pred_logits']
+
+        idx = self._get_src_permutation_idx(indices)
+        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
+        target_classes = torch.full(src_logits.shape[:2], self.num_classes,
+                                    dtype=torch.int64, device=src_logits.device)
+        target_classes[idx] = target_classes_o
+
+        src_logits = src_logits.flatten(0, 1)
+
+        target_classes = target_classes.flatten(0, 1)
+        pos_inds = torch.nonzero(target_classes != self.num_classes, as_tuple=True)[0]
+        labels = torch.zeros_like(src_logits)
+        labels[pos_inds, target_classes[pos_inds]] = 1
+
+        class_loss = sigmoid_focal_loss_jit(
+            src_logits,
+            labels,
+            alpha=self.focal_loss_alpha,
+            gamma=self.focal_loss_gamma,
+            reduction="sum",
+        ) / num_boxes
+        losses = {'loss_ce': class_loss}
+
+        return losses
+
+
+    def loss_boxes(self, outputs, targets, indices, num_boxes, mask_encoding):
+        assert 'pred_boxes' in outputs
+        idx = self._get_src_permutation_idx(indices)
+        src_boxes = outputs['pred_boxes'][idx]
+        target_boxes = torch.cat([t['boxes_xyxy'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+
+        losses = {}
+        loss_giou = 1 - torch.diag(box_ops.generalized_box_iou(src_boxes, target_boxes))
+        losses['loss_giou'] = loss_giou.sum() / num_boxes
+
+        image_size = torch.cat([v["image_size_xyxy_tgt"] for v in targets])
+        src_boxes_ = src_boxes / image_size
+        target_boxes_ = target_boxes / image_size
+
+        loss_bbox = F.l1_loss(src_boxes_, target_boxes_, reduction='none')
+        losses['loss_bbox'] = loss_bbox.sum() / num_boxes
+
+        return losses
+
+    def loss_masks(self, outputs, targets, indices, num_boxes, mask_encoding):
+        assert 'pred_masks' in outputs
+        idx = self._get_src_permutation_idx(indices)
+        src_masks_feat = outputs['pred_masks'][idx]
+        target_masks = torch.cat([t['gt_masks'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+        mask_loss_func = nn.MSELoss(reduction="none")
+
+        target_masks_feat = mask_encoding.encoder(target_masks.flatten(1))
+        loss = mask_loss_func(src_masks_feat, target_masks_feat)
+        
+        losses = {}
+        losses['loss_feat'] =  loss.sum() / num_boxes / self.cfg.MODEL.SWINTS.MASK_DIM
+
+        eps = 1e-5
+        src_masks = mask_encoding.decoder(src_masks_feat.flatten(1))
+        n_inst = src_masks.size(0)
+        target_masks = target_masks.flatten(1)
+        intersection = (src_masks * target_masks).sum(dim=1)
+        union = (src_masks ** 2.0).sum(dim=1) + (target_masks ** 2.0).sum(dim=1) + eps
+        loss = 1. - (2 * intersection / union)
+        losses['loss_dice'] = loss.sum() / num_boxes
+
+        return losses
+    def loss_rec(self, outputs, targets, indices, num_boxes, mask_encoding):
+        """Classification loss (NLL)
+        targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
+        """
+        src_rec = outputs['pred_rec']
+        losses = {}
+        losses['loss_rec'] = src_rec
+        return losses
+
+    def _get_src_permutation_idx(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        return batch_idx, src_idx
+
+    def _get_tgt_permutation_idx(self, indices):
+        # permute targets following indices
+        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+        return batch_idx, tgt_idx
+
+    def get_loss(self, loss, outputs, targets, indices, num_boxes, mask_encoding, **kwargs):
+        loss_map = {
+            'labels': self.loss_labels,
+            'boxes': self.loss_boxes,
+            'masks': self.loss_masks,
+            'rec': self.loss_rec
+        }
+        assert loss in loss_map, f'do you really want to compute {loss} loss?'
+        return loss_map[loss](outputs, targets, indices, num_boxes, mask_encoding, **kwargs)
+
+    def forward(self, outputs, targets, mask_encoding):
+        outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs'}
+
+        # Retrieve the matching between the outputs of the last layer and the targets
+        indices = self.matcher(outputs_without_aux, targets, mask_encoding)
+
+        # Compute the average number of target boxes accross all nodes, for normalization purposes
+        num_boxes = sum(len(t["labels"]) for t in targets)
+        num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
+        if is_dist_avail_and_initialized():
+            torch.distributed.all_reduce(num_boxes)
+        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
+
+        # Compute all the requested losses
+        losses = {}
+        for loss in self.losses:
+            losses.update(self.get_loss(loss, outputs, targets, indices, num_boxes, mask_encoding))
+
+        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
+        if 'aux_outputs' in outputs:
+            for i, aux_outputs in enumerate(outputs['aux_outputs']):
+                indices = self.matcher(aux_outputs, targets, mask_encoding)
+                for loss in self.losses:
+                    # if loss == 'masks':
+                    #     # Intermediate masks losses are too costly to compute, we ignore them.
+                    #     continue
+                    if loss == 'rec':
+                        continue
+                    kwargs = {}
+                    l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, mask_encoding, **kwargs)
+                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+        return losses
+
+
+
+class HungarianMatcher(nn.Module):
+    def __init__(self, cfg, cost_class: float = 1, cost_bbox: float = 1, cost_giou: float = 1, cost_mask: float = 1):
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        self.cost_mask = cost_mask
+        self.focal_loss_alpha = cfg.MODEL.SWINTS.ALPHA
+        self.focal_loss_gamma = cfg.MODEL.SWINTS.GAMMA
+        assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, "all costs cant be 0"
+
+    @torch.no_grad()
+    def forward(self, outputs, targets, mask_encoding):
+        bs, num_queries = outputs["pred_logits"].shape[:2]
+
+        out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()  # [batch_size * num_queries, num_classes]
+        out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+        # Also concat the target labels and boxes
+        tgt_ids = torch.cat([v["labels"] for v in targets])
+        tgt_bbox = torch.cat([v["boxes_xyxy"] for v in targets])
+
+        
+        alpha = self.focal_loss_alpha
+        gamma = self.focal_loss_gamma
+        neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
+        pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+        cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
+
+        # Compute the L1 cost between boxes
+        image_size_out = torch.cat([v["image_size_xyxy"].unsqueeze(0) for v in targets])
+        image_size_out = image_size_out.unsqueeze(1).repeat(1, num_queries, 1).flatten(0, 1)
+        image_size_tgt = torch.cat([v["image_size_xyxy_tgt"] for v in targets])
+
+        out_bbox_ = out_bbox / image_size_out
+        tgt_bbox_ = tgt_bbox / image_size_tgt
+        cost_bbox = torch.cdist(out_bbox_, tgt_bbox_, p=1)
+
+        # Compute the giou cost betwen boxes
+        # cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
+        cost_giou = -generalized_box_iou(out_bbox, tgt_bbox)
+
+        # mask loss
+        tgt_mask = torch.cat([v["gt_masks"] for v in targets]).flatten(1)
+        tgt_mask_feat = mask_encoding.encoder(tgt_mask)
+        out_mask_feat = outputs["pred_masks"].flatten(0, 1).flatten(1)
+
+        tgt_mask_feat = nn.functional.normalize(tgt_mask_feat, p=2)
+        out_mask_feat = nn.functional.normalize(out_mask_feat, p=2)
+        
+        # cost_mask = -torch.mm(out_mask, tgt_mask.T)
+        cost_mask = -(torch.mm(out_mask_feat, tgt_mask_feat.T) + 1.0) / 2.0
+
+        # Final cost matrix
+        C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou + self.cost_mask * cost_mask
+        C = C.view(bs, num_queries, -1).cpu()
+
+        sizes = [len(v["boxes"]) for v in targets]
+        indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
+        return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/rec_stage.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/rec_stage.py
new file mode 100644
index 0000000000000000000000000000000000000000..66ad1c23f652e5bdbdf16aa2e73eecabd5650237
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/rec_stage.py
@@ -0,0 +1,207 @@
+import torch
+from torch import nn, Tensor
+from .FocalTransformer import FocalTransformerBlock
+from .transformer import PositionalEncoding
+from .roi_seq_predictors import SequencePredictor
+
+class DynamicConv_v2(nn.Module):
+    
+    def __init__(self, cfg):
+        super().__init__()
+
+        self.hidden_dim = cfg.MODEL.SWINTS.HIDDEN_DIM
+        self.dim_dynamic = cfg.MODEL.SWINTS.DIM_DYNAMIC
+        self.num_dynamic = cfg.MODEL.SWINTS.NUM_DYNAMIC
+        self.num_params = self.hidden_dim * self.dim_dynamic
+        self.dynamic_layer = nn.Linear(self.hidden_dim, self.num_dynamic * self.num_params)
+
+
+        self.norm1 = nn.LayerNorm(self.dim_dynamic)
+        self.norm2 = nn.LayerNorm(self.hidden_dim)
+
+        self.activation = nn.ELU(inplace=True)
+
+    def forward(self, pro_features, roi_features):
+        '''
+        pro_features: (1,  N * nr_boxes, self.d_model)
+        roi_features: (rec_resolution, N * nr_boxes, self.d_model)
+        '''
+        features = roi_features.permute(1, 0, 2)
+        parameters = self.dynamic_layer(pro_features).permute(1, 0, 2)
+
+        param1 = parameters[:, :, :self.num_params].view(-1, self.hidden_dim, self.dim_dynamic)
+        param2 = parameters[:, :, self.num_params:].view(-1, self.dim_dynamic, self.hidden_dim)
+        del parameters
+
+        features = torch.bmm(features, param1)
+      
+        del param1
+        features = self.norm1(features)
+        features = self.activation(features)
+
+        features = torch.bmm(features, param2)
+
+        del param2
+
+        features = self.norm2(features)
+        features = self.activation(features)
+
+        return features
+
+class REC_STAGE(nn.Module):
+
+    def __init__(self, cfg, d_model, num_classes, dim_feedforward=2048, nhead=8, dropout=0.2, activation="relu"):
+        super().__init__()
+
+        self.d_model = d_model
+
+        # dynamic.
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.inst_interact = DynamicConv_v2(cfg)
+
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+
+        self.activation = nn.ELU(inplace=True)
+
+        self.feat_size = cfg.MODEL.REC_HEAD.POOLER_RESOLUTION
+        self.rec_batch_size = cfg.MODEL.REC_HEAD.BATCH_SIZE
+        self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=4)
+        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=3)
+
+        self.TLSAM =  nn.Sequential(
+            FocalTransformerBlock(dim=256, input_resolution=self.feat_size, num_heads=8, window_size=7, expand_size=0, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.2,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm, pool_method="fc", 
+                 focal_level=2, focal_window=3, use_layerscale=False, layerscale_value=1e-4),
+                FocalTransformerBlock(dim=256, input_resolution=self.feat_size, num_heads=8, window_size=7, expand_size=0, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.2,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm, pool_method="fc", 
+                 focal_level=2, focal_window=3, use_layerscale=False, layerscale_value=1e-4),FocalTransformerBlock(dim=256, input_resolution=self.feat_size, num_heads=8, window_size=7, expand_size=0, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.2,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm, pool_method="fc", 
+                 focal_level=2, focal_window=3, use_layerscale=False, layerscale_value=1e-4)
+                 )
+
+        self.pos_encoder = PositionalEncoding(self.d_model, max_len=(self.feat_size[0]//4)*(self.feat_size[1]//4))
+        num_channels = d_model
+        in_channels = d_model
+        mode = 'nearest'
+        self.k_encoder = nn.Sequential(
+            encoder_layer(num_channels, num_channels, s=(2, 2)),
+            encoder_layer(num_channels, num_channels, s=(2, 2))
+        )
+        self.k_decoder_det = nn.Sequential(
+            decoder_layer_worelu(num_channels, num_channels, scale_factor=2, mode=mode),
+            decoder_layer_worelu(num_channels, num_channels, scale_factor=2, mode=mode),
+            decoder_layer(num_channels, in_channels, size=(self.feat_size[0], self.feat_size[1]), mode=mode)
+        )
+        self.k_decoder_rec = nn.Sequential(
+            decoder_layer(num_channels, num_channels, scale_factor=2, mode=mode),
+            decoder_layer(num_channels, num_channels, scale_factor=2, mode=mode),
+        )
+
+        self.seq_decoder = SequencePredictor(cfg, d_model)
+        self.rescale = nn.Upsample(size=(self.feat_size[0], self.feat_size[1]), mode="bilinear", align_corners=False)
+
+    def forward(self, roi_features, pro_features, gt_masks, N, nr_boxes, idx=None, targets=None):
+        """
+        :param bboxes: (N, nr_boxes, 4)
+        :param pro_features: (N, nr_boxes, d_model)
+        """
+        features = []
+        k = roi_features
+        for i in range(0, len(self.k_encoder)):
+            k = self.k_encoder[i](k)
+            features.append(k)
+        n,c,h,w = k.size()
+        k = k.view(n, c, -1).permute(2, 0, 1)
+       # self_att.
+        pro_features = pro_features.view(N, nr_boxes, self.d_model).permute(1, 0, 2)
+        pro_features2 = self.self_attn(pro_features, pro_features, value=pro_features)[0]
+        pro_features = pro_features + self.dropout1(pro_features2)
+
+        del pro_features2
+
+        pro_features = self.norm1(pro_features)
+   
+   #     # inst_interact.
+        if idx:
+            pro_features = pro_features.permute(1, 0, 2)[idx]
+            pro_features = pro_features.repeat(2,1)[:self.rec_batch_size]
+        else:
+            pro_features = pro_features.permute(1, 0, 2)
+        pro_features = pro_features.reshape(1, -1, self.d_model)
+        pro_features2 = self.inst_interact(pro_features, k)
+        pro_features = k.permute(1,0,2) + self.dropout2(pro_features2)
+
+        del pro_features2
+
+        obj_features = self.norm2(pro_features)
+
+   #     # obj_feature.
+        obj_features2 = self.linear2(self.dropout(self.activation(self.linear1(obj_features))))
+        obj_features = obj_features + self.dropout3(obj_features2)
+
+        del obj_features2
+        obj_features = self.norm3(obj_features)
+        obj_features = obj_features.permute(1,0,2)
+        obj_features = self.pos_encoder(obj_features)
+        obj_features = self.transformer_encoder(obj_features)
+        obj_features = obj_features.permute(1,2,0)
+        n,c,w = obj_features.shape
+        obj_features = obj_features.view(n,c,self.feat_size[0]//4,self.feat_size[1]//4)
+        obj_features = obj_features
+        k = k.permute(1,2,0)
+        k = k.view(n,c,self.feat_size[0]//4,self.feat_size[1]//4)
+        k_rec = k*obj_features.sigmoid()
+        k_rec = self.k_decoder_rec[0](k_rec)
+        k_rec = k_rec + features[0]
+
+        k_det = obj_features
+        k_det = self.k_decoder_det[0](k_det)
+        k_det = k_det + features[0]
+        k_rec = k_rec * k_det.sigmoid()
+
+        k_rec = self.k_decoder_rec[1](k_rec) + roi_features
+        k_det = self.k_decoder_det[1](k_det) + roi_features
+        k_rec = k_rec * k_det.sigmoid()
+
+        k_rec = self.k_decoder_det[-1](k_rec)
+        k_rec = k_rec.flatten(-2,-1).permute(0,2,1)
+        k_rec = self.TLSAM(k_rec)
+        k_rec = k_rec.permute(0,2,1).view(n,c,self.feat_size[0],self.feat_size[1])
+        gt_masks = self.rescale(gt_masks.unsqueeze(1))
+        k_rec = k_rec*gt_masks
+        attn_vecs = self.seq_decoder(k_rec, targets, targets)
+        return attn_vecs
+
+def encoder_layer(in_c, out_c, k=3, s=2, p=1):
+    return nn.Sequential(nn.Conv2d(in_c, out_c, k, s, p),
+                         nn.BatchNorm2d(out_c),
+                         nn.ReLU(True))
+
+def decoder_layer(in_c, out_c, k=3, s=1, p=1, mode='nearest', scale_factor=None, size=None):
+    align_corners = None if mode=='nearest' else True
+    return nn.Sequential(nn.Upsample(size=size, scale_factor=scale_factor,
+                                     mode=mode, align_corners=align_corners),
+                         nn.Conv2d(in_c, out_c, k, s, p),
+                         nn.BatchNorm2d(out_c),
+                         nn.ReLU(True))
+
+def decoder_layer_worelu(in_c, out_c, k=3, s=1, p=1, mode='nearest', scale_factor=None, size=None):
+    align_corners = None if mode=='nearest' else True
+    return nn.Sequential(nn.Upsample(size=size, scale_factor=scale_factor,
+                                   mode=mode, align_corners=align_corners),
+                         nn.Conv2d(in_c, in_c, k, s, p),
+                         nn.BatchNorm2d(in_c),
+                         nn.ReLU(True),
+                         nn.Conv2d(in_c, out_c, k, s, p))
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/roi_seq_predictors.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/roi_seq_predictors.py
new file mode 100644
index 0000000000000000000000000000000000000000..565c1c681ecadf87f9b8b0927ea1e6e7a35bac3f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/roi_seq_predictors.py
@@ -0,0 +1,382 @@
+# Written by Minghui Liao
+import math
+import random
+
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+gpu_device = torch.device("cuda")
+cpu_device = torch.device("cpu")
+
+
+def reduce_mul(l):
+    out = 1.0
+    for x in l:
+        out *= x
+    return out
+
+
+def check_all_done(seqs):
+    for seq in seqs:
+        if not seq[-1]:
+            return False
+    return True
+
+def num2char(num):
+    CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','´', "~", "ˋ", "ˊ","﹒", "ˀ", "˜", "ˇ", "ˆ", "˒","‑"]
+    char = chars[num]
+    return char
+
+# TODO
+class SequencePredictor(nn.Module):
+    def __init__(self,cfg, dim_in ):
+        super(SequencePredictor, self).__init__()
+        self.seq_encoder = nn.Sequential(
+            nn.Conv2d(dim_in, dim_in, 3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, stride=2, ceil_mode=True),
+        )
+        self.MAX_LENGTH = 100
+        RESIZE_WIDTH = cfg.MODEL.REC_HEAD.RESOLUTION[1]
+        RESIZE_HEIGHT = cfg.MODEL.REC_HEAD.RESOLUTION[0]
+        self.RESIZE_WIDTH = RESIZE_WIDTH
+        self.RESIZE_HEIGHT = RESIZE_HEIGHT
+        x_onehot_size = int(RESIZE_WIDTH / 2)
+        y_onehot_size = int(RESIZE_HEIGHT / 2)
+        self.num_class = cfg.MODEL.REC_HEAD.NUM_CLASSES
+        self.seq_decoder = BahdanauAttnDecoderRNN(
+            256, self.num_class, self.num_class, n_layers=1, dropout_p=0.1, onehot_size = (y_onehot_size, x_onehot_size)
+        )
+        # self.criterion_seq_decoder = nn.NLLLoss(ignore_index = -1, reduce=False)
+        self.criterion_seq_decoder = nn.NLLLoss(ignore_index=-1, reduction="none")
+        # self.rescale = nn.Upsample(size=(16, 64), mode="bilinear", align_corners=False)
+        self.rescale = nn.Upsample(size=(RESIZE_HEIGHT, RESIZE_WIDTH), mode="bilinear", align_corners=False)
+
+        self.x_onehot = nn.Embedding(x_onehot_size, x_onehot_size)
+        self.x_onehot.weight.data = torch.eye(x_onehot_size)
+        self.y_onehot = nn.Embedding(y_onehot_size, y_onehot_size)
+        self.y_onehot.weight.data = torch.eye(y_onehot_size)
+
+        for name, param in self.named_parameters():
+            if "bias" in name:
+                nn.init.constant_(param, 0)
+            elif "weight" in name:
+                # Caffe2 implementation uses MSRAFill, which in fact
+                # corresponds to kaiming_normal_ in PyTorch
+                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
+
+    def forward(
+        self, x, decoder_targets=None, word_targets=None, use_beam_search=False
+    ):
+        rescale_out = self.rescale(x)
+        seq_decoder_input = self.seq_encoder(rescale_out)
+        x_onehot_size = int(self.RESIZE_WIDTH / 2)
+        y_onehot_size = int(self.RESIZE_HEIGHT / 2)
+        x_t, y_t = np.meshgrid(np.linspace(0, x_onehot_size - 1, x_onehot_size), np.linspace(0, y_onehot_size - 1, y_onehot_size))
+        x_t = torch.LongTensor(x_t, device=cpu_device).cuda()
+        y_t = torch.LongTensor(y_t, device=cpu_device).cuda()
+        x_onehot_embedding = (
+            self.x_onehot(x_t)
+            .transpose(0, 2)
+            .transpose(1, 2)
+            .repeat(seq_decoder_input.size(0), 1, 1, 1)
+        )
+        y_onehot_embedding = (
+            self.y_onehot(y_t)
+            .transpose(0, 2)
+            .transpose(1, 2)
+            .repeat(seq_decoder_input.size(0), 1, 1, 1)
+        )
+        seq_decoder_input_loc = torch.cat(
+            [seq_decoder_input, x_onehot_embedding, y_onehot_embedding], 1
+        )
+        seq_decoder_input_reshape = (
+            seq_decoder_input_loc.view(
+                seq_decoder_input_loc.size(0), seq_decoder_input_loc.size(1), -1
+            )
+            .transpose(0, 2)
+            .transpose(1, 2)
+        )
+        if self.training:
+            bos_onehot = np.zeros(
+                (seq_decoder_input_reshape.size(1), 1), dtype=np.int32
+            )
+            bos_onehot[:, 0] = 0
+            decoder_input = torch.tensor(bos_onehot.tolist(), device=gpu_device)
+            decoder_hidden = torch.zeros(
+                (seq_decoder_input_reshape.size(1), 256), device=gpu_device
+            )
+            use_teacher_forcing = (
+                True
+                if random.random() < 1
+                else False
+            )
+            target_length = decoder_targets.size(1)
+            if use_teacher_forcing:
+                # Teacher forcing: Feed the target as the next input
+                for di in range(target_length):
+                    decoder_output, decoder_hidden, decoder_attention = self.seq_decoder(
+                        decoder_input, decoder_hidden, seq_decoder_input_reshape
+                    )
+                    if di == 0:
+                        loss_seq_decoder = self.criterion_seq_decoder(
+                            decoder_output, word_targets[:, di]
+                        )
+                    else:
+                        loss_seq_decoder += self.criterion_seq_decoder(
+                            decoder_output, word_targets[:, di]
+                        )
+                    decoder_input = decoder_targets[:, di]  # Teacher forcing
+            else:
+                # Without teacher forcing: use its own predictions as the next input
+                for di in range(target_length):
+                    decoder_output, decoder_hidden, decoder_attention = self.seq_decoder(
+                        decoder_input, decoder_hidden, seq_decoder_input_reshape
+                    )
+                    topv, topi = decoder_output.topk(1)
+                    decoder_input = topi.squeeze(
+                        1
+                    ).detach()  # detach from history as input
+                    if di == 0:
+                        loss_seq_decoder = self.criterion_seq_decoder(
+                            decoder_output, word_targets[:, di]
+                        )
+                    else:
+                        loss_seq_decoder += self.criterion_seq_decoder(
+                            decoder_output, word_targets[:, di]
+                        )
+            loss_seq_decoder = loss_seq_decoder.sum() / loss_seq_decoder.size(0)
+            loss_seq_decoder = 0.2 * loss_seq_decoder
+            return loss_seq_decoder
+        else:
+            words = []
+            decoded_scores = []
+            detailed_decoded_scores = []
+            # real_length = 0
+            if use_beam_search:
+                for batch_index in range(seq_decoder_input_reshape.size(1)):
+                    decoder_hidden = torch.zeros((1, 256), device=gpu_device)
+                    word = []
+                    char_scores = []
+                    detailed_char_scores = []
+                    top_seqs = self.beam_search(
+                        seq_decoder_input_reshape[:, batch_index : batch_index + 1, :],
+                        decoder_hidden,
+                        beam_size=6,
+                        max_len=self.MAX_LENGTH,
+                    )
+                    top_seq = top_seqs[0]
+                    for character in top_seq[1:]:
+                        character_index = character[0]
+                        if character_index == self.cfg.SEQUENCE.NUM_CHAR:
+                            char_scores.append(character[1])
+                            detailed_char_scores.append(character[2])
+                            break
+                        else:
+                            if character_index == 0:
+                                word.append("~")
+                                char_scores.append(0.0)
+                            else:
+                                word.append(num2char(character_index))
+                                char_scores.append(character[1])
+                                detailed_char_scores.append(character[2])
+                    words.append("".join(word))
+                    decoded_scores.append(char_scores)
+                    detailed_decoded_scores.append(detailed_char_scores)
+            else:
+                for batch_index in range(seq_decoder_input_reshape.size(1)):
+                    bos_onehot = np.zeros((1, 1), dtype=np.int32)
+                    bos_onehot[:, 0] = 0
+                    decoder_input = torch.tensor(bos_onehot.tolist(), device=gpu_device)
+                    decoder_hidden = torch.zeros((1, 256), device=gpu_device)
+                    word = []
+                    char_scores = []
+                    for di in range(self.MAX_LENGTH):
+                        decoder_output, decoder_hidden, decoder_attention = self.seq_decoder(
+                            decoder_input,
+                            decoder_hidden,
+                            seq_decoder_input_reshape[
+                                :, batch_index : batch_index + 1, :
+                            ],
+                        )
+                        # decoder_attentions[di] = decoder_attention.data
+                        topv, topi = decoder_output.data.topk(1)
+                        char_scores.append(topv.item())
+                        if topi.item() == 0:
+                            break
+                        else:
+                            if topi.item() == 0:
+                                word.append(topi.item())
+                            else:
+                                word.append(topi.item())
+
+                        # real_length = di
+                        decoder_input = topi.squeeze(1).detach()
+                    tmp = np.zeros((self.MAX_LENGTH), dtype=np.int32)
+                    tmp[:len(word)] = torch.tensor(word)
+                    word = tmp
+                    words.append(word)
+                    decoded_scores.append(char_scores)
+            return words
+
+    def beam_search_step(self, encoder_context, top_seqs, k):
+        all_seqs = []
+        for seq in top_seqs:
+            seq_score = reduce_mul([_score for _, _score, _, _ in seq])
+            if seq[-1][0] == self.cfg.SEQUENCE.NUM_CHAR - 1:
+                all_seqs.append((seq, seq_score, seq[-1][2], True))
+                continue
+            decoder_hidden = seq[-1][-1][0]
+            onehot = np.zeros((1, 1), dtype=np.int32)
+            onehot[:, 0] = seq[-1][0]
+            decoder_input = torch.tensor(onehot.tolist(), device=gpu_device)
+            decoder_output, decoder_hidden, decoder_attention = self.seq_decoder(
+                decoder_input, decoder_hidden, encoder_context
+            )
+            detailed_char_scores = decoder_output.cpu().numpy()
+            # print(decoder_output.shape)
+            scores, candidates = decoder_output.data[:, 1:].topk(k)
+            for i in range(k):
+                character_score = scores[:, i]
+                character_index = candidates[:, i]
+                score = seq_score * character_score.item()
+                char_score = seq_score * detailed_char_scores
+                rs_seq = seq + [
+                    (
+                        character_index.item() + 1,
+                        character_score.item(),
+                        char_score,
+                        [decoder_hidden],
+                    )
+                ]
+                done = character_index.item() + 1 == 38
+                all_seqs.append((rs_seq, score, char_score, done))
+        all_seqs = sorted(all_seqs, key=lambda seq: seq[1], reverse=True)
+        topk_seqs = [seq for seq, _, _, _ in all_seqs[:k]]
+        all_done = check_all_done(all_seqs[:k])
+        return topk_seqs, all_done
+
+    def beam_search(self, encoder_context, decoder_hidden, beam_size=6, max_len=32):
+        char_score = np.zeros(self.cfg.SEQUENCE.NUM_CHAR)
+        top_seqs = [[(self.cfg.SEQUENCE.BOS_TOKEN, 1.0, char_score, [decoder_hidden])]]
+        # loop
+        for _ in range(max_len):
+            top_seqs, all_done = self.beam_search_step(
+                encoder_context, top_seqs, beam_size
+            )
+            if all_done:
+                break
+        return top_seqs
+
+
+class Attn(nn.Module):
+    def __init__(self, method, hidden_size, embed_size, onehot_size):
+        super(Attn, self).__init__()
+        self.method = method
+        self.hidden_size = hidden_size
+        self.embed_size = embed_size
+        self.attn = nn.Linear(2 * self.hidden_size + onehot_size, hidden_size)
+        # self.attn = nn.Linear(hidden_size, hidden_size)
+        self.v = nn.Parameter(torch.rand(hidden_size))
+        stdv = 1.0 / math.sqrt(self.v.size(0))
+        self.v.data.normal_(mean=0, std=stdv)
+
+    def forward(self, hidden, encoder_outputs):
+        """
+        :param hidden:
+            previous hidden state of the decoder, in shape (B, hidden_size)
+        :param encoder_outputs:
+            encoder outputs from Encoder, in shape (H*W, B, hidden_size)
+        :return
+            attention energies in shape (B, H*W)
+        """
+        max_len = encoder_outputs.size(0)
+        # this_batch_size = encoder_outputs.size(1)
+        H = hidden.repeat(max_len, 1, 1).transpose(0, 1)  # (B, H*W, hidden_size)
+        encoder_outputs = encoder_outputs.transpose(0, 1)  # (B, H*W, hidden_size)
+        attn_energies = self.score(
+            H, encoder_outputs
+        )  # compute attention score (B, H*W)
+        return F.softmax(attn_energies, dim=1).unsqueeze(
+            1
+        )  # normalize with softmax (B, 1, H*W)
+
+    def score(self, hidden, encoder_outputs):
+        energy = torch.tanh(
+            self.attn(torch.cat([hidden, encoder_outputs], 2))
+        )  # (B, H*W, 2*hidden_size+H+W)->(B, H*W, hidden_size)
+        energy = energy.transpose(2, 1)  # (B, hidden_size, H*W)
+        v = self.v.repeat(encoder_outputs.data.shape[0], 1).unsqueeze(
+            1
+        )  # (B, 1, hidden_size)
+        energy = torch.bmm(v, energy)  # (B, 1, H*W)
+        return energy.squeeze(1)  # (B, H*W)
+
+
+class BahdanauAttnDecoderRNN(nn.Module):
+    def __init__(
+        self,
+        hidden_size,
+        embed_size,
+        output_size,
+        n_layers=1,
+        dropout_p=0,
+        bidirectional=False,
+        onehot_size = (8, 32)
+    ):
+        super(BahdanauAttnDecoderRNN, self).__init__()
+        # Define parameters
+        self.hidden_size = hidden_size
+        self.embed_size = embed_size
+        self.output_size = output_size
+        self.n_layers = n_layers
+        self.dropout_p = dropout_p
+        # Define layers
+        self.embedding = nn.Embedding(output_size, embed_size)
+        self.embedding.weight.data = torch.eye(embed_size)
+        # self.dropout = nn.Dropout(dropout_p)
+        self.word_linear = nn.Linear(embed_size, hidden_size)
+        self.attn = Attn("concat", hidden_size, embed_size, onehot_size[0] + onehot_size[1])
+        self.rnn = nn.GRUCell(2 * hidden_size + onehot_size[0] + onehot_size[1], hidden_size)
+        self.out = nn.Linear(hidden_size, output_size)
+
+    def forward(self, word_input, last_hidden, encoder_outputs):
+        """
+        :param word_input:
+            word input for current time step, in shape (B)
+        :param last_hidden:
+            last hidden stat of the decoder, in shape (layers*direction*B, hidden_size)
+        :param encoder_outputs:
+            encoder outputs in shape (H*W, B, C)
+        :return
+            decoder output
+        """
+        # Get the embedding of the current input word (last output word)
+        word_embedded_onehot = self.embedding(word_input).view(
+            1, word_input.size(0), -1
+        )  # (1,B,embed_size)
+        word_embedded = self.word_linear(word_embedded_onehot)  # (1, B, hidden_size)
+        attn_weights = self.attn(last_hidden, encoder_outputs)  # (B, 1, H*W)
+        context = attn_weights.bmm(
+            encoder_outputs.transpose(0, 1)
+        )  # (B, 1, H*W) * (B, H*W, C) = (B,1,C)
+        context = context.transpose(0, 1)  # (1,B,C)
+        # Combine embedded input word and attended context, run through RNN
+        # 2 * hidden_size + W + H: 256 + 256 + 32 + 8 = 552
+        rnn_input = torch.cat((word_embedded, context), 2)
+        last_hidden = last_hidden.view(last_hidden.size(0), -1)
+        rnn_input = rnn_input.view(word_input.size(0), -1)
+        hidden = self.rnn(rnn_input, last_hidden)
+        if not self.training:
+            output = F.softmax(self.out(hidden), dim=1)
+        else:
+            output = F.log_softmax(self.out(hidden), dim=1)
+        # Return final output, hidden state
+        # print(output.shape)
+        return output, hidden, attn_weights
+
+
+def make_roi_seq_predictor(cfg, dim_in):
+    return SequencePredictor(cfg, dim_in)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/swints.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/swints.py
new file mode 100644
index 0000000000000000000000000000000000000000..180cf949523a0d6a703d008d03b0200e1be6caa3
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/swints.py
@@ -0,0 +1,285 @@
+import logging
+import math
+from typing import List
+
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling import META_ARCH_REGISTRY, build_backbone, detector_postprocess
+from detectron2.modeling.roi_heads import build_roi_heads
+
+from detectron2.structures import Boxes, ImageList, Instances
+from detectron2.utils.logger import log_first_n
+from fvcore.nn import giou_loss, smooth_l1_loss
+
+from .loss import SetCriterion, HungarianMatcher
+from .head import DynamicHead
+from .util.box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh
+from .util.misc import (NestedTensor, nested_tensor_from_tensor_list,
+                       accuracy, get_world_size, interpolate,
+                       is_dist_avail_and_initialized)
+
+from detectron2.layers import Conv2d, get_norm
+from .MaskEncoding import PCAMaskEncoding
+from detectron2.modeling.backbone import PatchEmbed
+
+__all__ = ["SWINTS"]
+
+
+class ImgFeatExtractor(nn.Module):
+    def __init__(self, cfg):
+        super().__init__()
+        # self.img_feat_layer = nn.AdaptiveAvgPool2d(1)
+        self.cfg = cfg
+
+    def forward(self, features):
+        for i, f in enumerate(features):
+            if i == 0:
+                x = torch.mean(torch.mean(f, -1), -1) #self.img_feat_layer(f)
+            else:
+                x_p = torch.mean(torch.mean(f, -1), -1) #self.img_feat_layer(f)
+                x = x + x_p
+
+        img_feats = x.squeeze(-1).squeeze(-1).unsqueeze(1).repeat(1, self.cfg.MODEL.SWINTS.NUM_PROPOSALS, 1,)
+
+        del x_p
+        del x
+        
+        return img_feats
+
+
+@META_ARCH_REGISTRY.register()
+class SWINTS(nn.Module):
+
+    def __init__(self, cfg):
+        super().__init__()
+
+        self.cfg = cfg
+
+        self.device = torch.device(cfg.MODEL.DEVICE)
+
+        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        self.num_classes = cfg.MODEL.SWINTS.NUM_CLASSES
+        self.num_proposals = cfg.MODEL.SWINTS.NUM_PROPOSALS
+        self.hidden_dim = cfg.MODEL.SWINTS.HIDDEN_DIM
+        self.num_heads = cfg.MODEL.SWINTS.NUM_HEADS
+
+        # Build Backbone.
+        self.backbone = build_backbone(cfg)
+        self.size_divisibility = self.backbone.size_divisibility
+        
+        # Build Proposals.
+        self.pos_embeddings = nn.Embedding(self.num_proposals, self.hidden_dim)
+        self.init_proposal_boxes = nn.Embedding(self.num_proposals, 4)
+        nn.init.constant_(self.init_proposal_boxes.weight[:, :2], 0.5)
+        nn.init.constant_(self.init_proposal_boxes.weight[:, 2:], 1.0)
+
+        # --------
+        self.IFE = ImgFeatExtractor(cfg)
+        self.mask_encoding = PCAMaskEncoding(cfg)
+        # encoding parameters.
+        components_path = cfg.MODEL.SWINTS.PATH_COMPONENTS
+        # update parameters.
+        parameters = np.load(components_path)
+        components = nn.Parameter(torch.from_numpy(parameters['components_c'][0]).float().to(self.device),requires_grad=False)
+        explained_variances = nn.Parameter(torch.from_numpy(parameters['explained_variance_c'][0]).float().to(self.device), requires_grad=False)
+        means = nn.Parameter(torch.from_numpy(parameters['mean_c'][0]).float().to(self.device),requires_grad=False)
+        self.mask_encoding.components = components
+        self.mask_encoding.explained_variances = explained_variances
+        self.mask_encoding.means = means
+        
+        # Build Dynamic Head.
+        self.head = DynamicHead(cfg=cfg, roi_input_shape=self.backbone.output_shape())
+
+        # Loss parameters:
+        class_weight = cfg.MODEL.SWINTS.CLASS_WEIGHT
+        giou_weight = cfg.MODEL.SWINTS.GIOU_WEIGHT
+        l1_weight = cfg.MODEL.SWINTS.L1_WEIGHT
+        rec_weight = cfg.MODEL.SWINTS.REC_WEIGHT
+        no_object_weight = cfg.MODEL.SWINTS.NO_OBJECT_WEIGHT
+        mask_weight = cfg.MODEL.SWINTS.MASK_WEIGHT
+
+        self.deep_supervision = cfg.MODEL.SWINTS.DEEP_SUPERVISION
+
+        # Build Criterion.
+        matcher = HungarianMatcher(cfg=cfg,
+                                   cost_class=class_weight, 
+                                   cost_bbox=l1_weight, 
+                                   cost_giou=giou_weight,
+                                   cost_mask=mask_weight)
+        self.matcher = matcher
+        weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight, "loss_feat": mask_weight, "loss_dice": mask_weight}
+        if self.deep_supervision:
+            aux_weight_dict = {}
+            for i in range(self.num_heads - 1):
+                aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()})
+            weight_dict.update(aux_weight_dict)
+        weight_dict["loss_rec"] = rec_weight
+        losses = ["labels", "boxes", "masks", "rec"]
+
+        self.criterion = SetCriterion(cfg=cfg,
+                                      num_classes=self.num_classes,
+                                      matcher=matcher,
+                                      weight_dict=weight_dict,
+                                      eos_coef=no_object_weight,
+                                      losses=losses)
+
+        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
+        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
+        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
+        self.to(self.device)
+
+
+    def forward(self, batched_inputs):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
+                Each item in the list contains the inputs for one image.
+                For now, each item in the list is a dict that contains:
+
+                * image: Tensor, image in (C, H, W) format.
+                * instances: Instances
+
+                Other information that's included in the original dicts, such as:
+
+                * "height", "width" (int): the output resolution of the model, used in inference.
+                  See :meth:`postprocess` for details.
+        """
+        images, images_whwh = self.preprocess_image(batched_inputs)
+        if isinstance(images, (list, torch.Tensor)):
+            images = nested_tensor_from_tensor_list(images)
+
+        # Feature Extraction.
+        src = self.backbone(images.tensor)
+
+        features = list()      
+        for f in self.in_features:
+            feature = src[f]
+            features.append(feature)
+
+        # Prepare Proposals.
+        proposal_boxes = self.init_proposal_boxes.weight.clone()
+        proposal_boxes = box_cxcywh_to_xyxy(proposal_boxes)
+        proposal_boxes = proposal_boxes[None] * images_whwh[:, None, :]
+
+        img_feats = self.IFE(features)
+        bs = len(features[0])
+        pos_embeddings = self.pos_embeddings.weight[None].repeat(bs, 1, 1)
+        proposal_feats = img_feats + pos_embeddings
+        
+        del img_feats
+        if self.training:
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+            targets = self.prepare_targets(gt_instances)
+            outputs_class, outputs_coord, outputs_mask,out_rec = self.head(features, proposal_boxes, proposal_feats, targets, mask_encoding=self.mask_encoding, matcher=self.matcher)
+            output = {'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1], 'pred_masks': outputs_mask[-1], 'pred_rec': out_rec}
+            if self.deep_supervision:
+                output['aux_outputs'] = [{'pred_logits': a, 'pred_boxes': b, 'pred_masks': c}
+                                         for a, b, c in zip(outputs_class[:-1], outputs_coord[:-1], outputs_mask[:-1])]
+
+            loss_dict = self.criterion(output, targets, self.mask_encoding)
+            weight_dict = self.criterion.weight_dict
+            for k in loss_dict.keys():
+                if k in weight_dict:
+                    loss_dict[k] *= weight_dict[k]
+            return loss_dict
+
+        else:
+            outputs_class, outputs_coord, outputs_mask,out_rec = self.head(features, proposal_boxes, proposal_feats, mask_encoding=self.mask_encoding)
+            output = {'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1], 'pred_masks': outputs_mask[-1]}
+            box_cls = output["pred_logits"]
+            box_pred = output["pred_boxes"]
+            mask_pred = output["pred_masks"].unsqueeze(dim=2)
+            results = Instances(images.image_sizes[0])
+            results.pred_boxes = Boxes(box_pred)
+            results.scores = box_cls
+            results.pred_masks = mask_pred.squeeze(1)
+            results.pred_rec = out_rec
+            results = [results]
+            processed_results = []
+            for results_per_image, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes):
+                height = input_per_image.get("height", image_size[0])
+                width = input_per_image.get("width", image_size[1])
+                r = detector_postprocess(results_per_image, height, width)
+                processed_results.append({"instances": r})
+            
+            return processed_results
+
+    @torch.no_grad()
+    def prepare_targets(self, targets):
+        new_targets = []
+        for targets_per_image in targets:
+            target = {}
+            h, w = targets_per_image.image_size
+            image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float, device=self.device)
+            gt_classes = targets_per_image.gt_classes
+            gt_boxes = targets_per_image.gt_boxes.tensor / image_size_xyxy
+            gt_boxes = box_xyxy_to_cxcywh(gt_boxes)
+            target["labels"] = gt_classes.to(self.device)
+            target["boxes"] = gt_boxes.to(self.device)
+            target["boxes_xyxy"] = targets_per_image.gt_boxes.tensor.to(self.device)
+            target["image_size_xyxy"] = image_size_xyxy.to(self.device)
+            image_size_xyxy_tgt = image_size_xyxy.unsqueeze(0).repeat(len(gt_boxes), 1)
+            target["image_size_xyxy_tgt"] = image_size_xyxy_tgt.to(self.device)
+            target["area"] = targets_per_image.gt_boxes.area().to(self.device)
+
+            target["gt_masks"] = targets_per_image.gt_masks.to(self.device)
+            masks = target['gt_masks'].crop_and_resize(targets_per_image.gt_boxes, 28)
+            target["gt_masks"] = masks.float()
+            target["rec"] = targets_per_image.rec.to(self.device)
+            new_targets.append(target)
+
+        return new_targets
+
+    @torch.no_grad()
+    def inference(self, box_cls, box_pred, mask_pred, image_sizes, recred):
+        """
+        Arguments:
+            box_cls (Tensor): tensor of shape (batch_size, num_proposals, K).
+                The tensor predicts the classification probability for each proposal.
+            box_pred (Tensor): tensors of shape (batch_size, num_proposals, 4).
+                The tensor predicts 4-vector (x,y,w,h) box
+                regression values for every proposal
+            image_sizes (List[torch.Size]): the input image sizes
+
+        Returns:
+            results (List[Instances]): a list of #images elements.
+        """
+        assert len(box_cls) == len(image_sizes)
+        results = []
+        #
+        scores = torch.sigmoid(box_cls)
+        labels = torch.arange(self.num_classes, device=self.device).\
+                 unsqueeze(0).repeat(self.num_proposals, 1).flatten(0, 1)
+        for i, (scores_per_image, box_pred_per_image, mask_pred_per_image, image_size, rec_per_image) in enumerate(zip(
+                scores, box_pred, mask_pred, image_sizes, rec_pred
+        )):
+            result = Instances(image_size)
+            scores_per_image, topk_indices = scores_per_image.flatten(0, 1).topk(self.num_proposals, sorted=False)
+            labels_per_image = labels[topk_indices]
+            result.pred_boxes = Boxes(box_pred_per_image)
+            result.scores = scores_per_image
+            result.pred_classes = labels_per_image
+            result.pred_masks = mask_pred_per_image
+            result.pred_rec = rec_per_image
+            results.append(result)
+        return results
+
+    def preprocess_image(self, batched_inputs):
+        """
+        Normalize, pad and batch the input images.
+        """
+        images = [self.normalizer(x["image"].to(self.device)) for x in batched_inputs]
+        images = ImageList.from_tensors(images, self.size_divisibility)
+
+        images_whwh = list()
+        for bi in batched_inputs:
+            h, w = bi["image"].shape[-2:]
+            images_whwh.append(torch.tensor([w, h, w, h], dtype=torch.float32, device=self.device))
+        images_whwh = torch.stack(images_whwh)
+
+        return images, images_whwh
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/topk.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/topk.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b8ba74ca8bd7e2303f41ba22e3020cc50957755
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/topk.py
@@ -0,0 +1,23 @@
+import heapq
+
+class TopK(object):
+    def __init__(self, k):
+        self.k = k
+        self.data = []
+
+    def reset(self):
+        self.data = []
+
+    def size(self):
+        return len(self.data)
+
+    def push(self, x):
+        if len(self.data) < self.k:
+            heapq.heappush(self.data, x)
+        else:
+            heapq.heappushpop(self.data, x)
+
+    def extract(self, sort=False):
+        if sort:
+            self.data.sort(reverse=True)
+        return self.data
\ No newline at end of file
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/transformer.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..6dde312185c7c68f54562885f23ea3b0670e6c40
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/SWINTS/transformer.py
@@ -0,0 +1,901 @@
+# pytorch 1.5.0
+import copy
+import math
+import warnings
+from typing import Optional
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+from torch.nn import Dropout, LayerNorm, Linear, Module, ModuleList, Parameter
+from torch.nn import functional as F
+from torch.nn.init import constant_, xavier_uniform_
+
+
+def multi_head_attention_forward(query,                           # type: Tensor
+                                 key,                             # type: Tensor
+                                 value,                           # type: Tensor
+                                 embed_dim_to_check,              # type: int
+                                 num_heads,                       # type: int
+                                 in_proj_weight,                  # type: Tensor
+                                 in_proj_bias,                    # type: Tensor
+                                 bias_k,                          # type: Optional[Tensor]
+                                 bias_v,                          # type: Optional[Tensor]
+                                 add_zero_attn,                   # type: bool
+                                 dropout_p,                       # type: float
+                                 out_proj_weight,                 # type: Tensor
+                                 out_proj_bias,                   # type: Tensor
+                                 training=True,                   # type: bool
+                                 key_padding_mask=None,           # type: Optional[Tensor]
+                                 need_weights=True,               # type: bool
+                                 attn_mask=None,                  # type: Optional[Tensor]
+                                 use_separate_proj_weight=False,  # type: bool
+                                 q_proj_weight=None,              # type: Optional[Tensor]
+                                 k_proj_weight=None,              # type: Optional[Tensor]
+                                 v_proj_weight=None,              # type: Optional[Tensor]
+                                 static_k=None,                   # type: Optional[Tensor]
+                                 static_v=None                    # type: Optional[Tensor]
+                                 ):
+    # type: (...) -> Tuple[Tensor, Optional[Tensor]]
+    r"""
+    Args:
+        query, key, value: map a query and a set of key-value pairs to an output.
+            See "Attention Is All You Need" for more details.
+        embed_dim_to_check: total dimension of the model.
+        num_heads: parallel attention heads.
+        in_proj_weight, in_proj_bias: input projection weight and bias.
+        bias_k, bias_v: bias of the key and value sequences to be added at dim=0.
+        add_zero_attn: add a new batch of zeros to the key and
+                       value sequences at dim=1.
+        dropout_p: probability of an element to be zeroed.
+        out_proj_weight, out_proj_bias: the output projection weight and bias.
+        training: apply dropout if is ``True``.
+        key_padding_mask: if provided, specified padding elements in the key will
+            be ignored by the attention. This is an binary mask. When the value is True,
+            the corresponding value on the attention layer will be filled with -inf.
+        need_weights: output attn_output_weights.
+        attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
+            the batches while a 3D mask allows to specify a different mask for the entries of each batch.
+        use_separate_proj_weight: the function accept the proj. weights for query, key,
+            and value in different forms. If false, in_proj_weight will be used, which is
+            a combination of q_proj_weight, k_proj_weight, v_proj_weight.
+        q_proj_weight, k_proj_weight, v_proj_weight, in_proj_bias: input projection weight and bias.
+        static_k, static_v: static key and value used for attention operators.
+    Shape:
+        Inputs:
+        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.
+          If a ByteTensor is provided, the non-zero positions will be ignored while the zero positions
+          will be unchanged. If a BoolTensor is provided, the positions with the
+          value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
+        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
+          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
+          S is the source sequence length. attn_mask ensures that position i is allowed to attend the unmasked
+          positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+          while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``
+          are not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
+          is provided, it will be added to the attention weight.
+        - static_k: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,
+          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.
+        - static_v: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,
+          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.
+        Outputs:
+        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
+          E is the embedding dimension.
+        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,
+          L is the target sequence length, S is the source sequence length.
+    """
+    # if not torch.jit.is_scripting():
+    #     tens_ops = (query, key, value, in_proj_weight, in_proj_bias, bias_k, bias_v,
+    #                 out_proj_weight, out_proj_bias)
+    #     if any([type(t) is not Tensor for t in tens_ops]) and has_torch_function(tens_ops):
+    #         return handle_torch_function(
+    #             multi_head_attention_forward, tens_ops, query, key, value,
+    #             embed_dim_to_check, num_heads, in_proj_weight, in_proj_bias,
+    #             bias_k, bias_v, add_zero_attn, dropout_p, out_proj_weight,
+    #             out_proj_bias, training=training, key_padding_mask=key_padding_mask,
+    #             need_weights=need_weights, attn_mask=attn_mask,
+    #             use_separate_proj_weight=use_separate_proj_weight,
+    #             q_proj_weight=q_proj_weight, k_proj_weight=k_proj_weight,
+    #             v_proj_weight=v_proj_weight, static_k=static_k, static_v=static_v)
+    tgt_len, bsz, embed_dim = query.size()
+    assert embed_dim == embed_dim_to_check
+    assert key.size() == value.size()
+
+    head_dim = embed_dim // num_heads
+    assert head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads"
+    scaling = float(head_dim) ** -0.5
+
+    if not use_separate_proj_weight:
+        if torch.equal(query, key) and torch.equal(key, value):
+            # self-attention
+            q, k, v = F.linear(query, in_proj_weight, in_proj_bias).chunk(3, dim=-1)
+
+        elif torch.equal(key, value):
+            # encoder-decoder attention
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = 0
+            _end = embed_dim
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            q = F.linear(query, _w, _b)
+
+            if key is None:
+                assert value is None
+                k = None
+                v = None
+            else:
+
+                # This is inline in_proj function with in_proj_weight and in_proj_bias
+                _b = in_proj_bias
+                _start = embed_dim
+                _end = None
+                _w = in_proj_weight[_start:, :]
+                if _b is not None:
+                    _b = _b[_start:]
+                k, v = F.linear(key, _w, _b).chunk(2, dim=-1)
+
+        else:
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = 0
+            _end = embed_dim
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            q = F.linear(query, _w, _b)
+
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = embed_dim
+            _end = embed_dim * 2
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            k = F.linear(key, _w, _b)
+
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = embed_dim * 2
+            _end = None
+            _w = in_proj_weight[_start:, :]
+            if _b is not None:
+                _b = _b[_start:]
+            v = F.linear(value, _w, _b)
+    else:
+        q_proj_weight_non_opt = torch.jit._unwrap_optional(q_proj_weight)
+        len1, len2 = q_proj_weight_non_opt.size()
+        assert len1 == embed_dim and len2 == query.size(-1)
+
+        k_proj_weight_non_opt = torch.jit._unwrap_optional(k_proj_weight)
+        len1, len2 = k_proj_weight_non_opt.size()
+        assert len1 == embed_dim and len2 == key.size(-1)
+
+        v_proj_weight_non_opt = torch.jit._unwrap_optional(v_proj_weight)
+        len1, len2 = v_proj_weight_non_opt.size()
+        assert len1 == embed_dim and len2 == value.size(-1)
+
+        if in_proj_bias is not None:
+            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias[0:embed_dim])
+            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias[embed_dim:(embed_dim * 2)])
+            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias[(embed_dim * 2):])
+        else:
+            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias)
+            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias)
+            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias)
+    q = q * scaling
+
+    if attn_mask is not None:
+        assert attn_mask.dtype == torch.float32 or attn_mask.dtype == torch.float64 or \
+            attn_mask.dtype == torch.float16 or attn_mask.dtype == torch.uint8 or attn_mask.dtype == torch.bool, \
+            'Only float, byte, and bool types are supported for attn_mask, not {}'.format(attn_mask.dtype)
+        if attn_mask.dtype == torch.uint8:
+            warnings.warn("Byte tensor for attn_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.")
+            attn_mask = attn_mask.to(torch.bool)
+
+        if attn_mask.dim() == 2:
+            attn_mask = attn_mask.unsqueeze(0)
+            if list(attn_mask.size()) != [1, query.size(0), key.size(0)]:
+                raise RuntimeError('The size of the 2D attn_mask is not correct.')
+        elif attn_mask.dim() == 3:
+            if list(attn_mask.size()) != [bsz * num_heads, query.size(0), key.size(0)]:
+                raise RuntimeError('The size of the 3D attn_mask is not correct.')
+        else:
+            raise RuntimeError("attn_mask's dimension {} is not supported".format(attn_mask.dim()))
+        # attn_mask's dim is 3 now.
+
+    # # convert ByteTensor key_padding_mask to bool
+    # if key_padding_mask is not None and key_padding_mask.dtype == torch.uint8:
+    #     warnings.warn("Byte tensor for key_padding_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.")
+    #     key_padding_mask = key_padding_mask.to(torch.bool)
+
+    if bias_k is not None and bias_v is not None:
+        if static_k is None and static_v is None:
+            k = torch.cat([k, bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = pad(attn_mask, (0, 1))
+            if key_padding_mask is not None:
+                key_padding_mask = pad(key_padding_mask, (0, 1))
+        else:
+            assert static_k is None, "bias cannot be added to static key."
+            assert static_v is None, "bias cannot be added to static value."
+    else:
+        assert bias_k is None
+        assert bias_v is None
+
+    q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1)
+    if k is not None:
+        k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)
+    if v is not None:
+        v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)
+
+    if static_k is not None:
+        assert static_k.size(0) == bsz * num_heads
+        assert static_k.size(2) == head_dim
+        k = static_k
+
+    if static_v is not None:
+        assert static_v.size(0) == bsz * num_heads
+        assert static_v.size(2) == head_dim
+        v = static_v
+
+    src_len = k.size(1)
+
+    if key_padding_mask is not None:
+        assert key_padding_mask.size(0) == bsz
+        assert key_padding_mask.size(1) == src_len
+
+    if add_zero_attn:
+        src_len += 1
+        k = torch.cat([k, torch.zeros((k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device)], dim=1)
+        v = torch.cat([v, torch.zeros((v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device)], dim=1)
+        if attn_mask is not None:
+            attn_mask = pad(attn_mask, (0, 1))
+        if key_padding_mask is not None:
+            key_padding_mask = pad(key_padding_mask, (0, 1))
+
+    attn_output_weights = torch.bmm(q, k.transpose(1, 2))
+    assert list(attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len]
+
+    if attn_mask is not None:
+        if attn_mask.dtype == torch.bool:
+            attn_output_weights.masked_fill_(attn_mask, float('-inf'))
+        else:
+            attn_output_weights += attn_mask
+
+
+    if key_padding_mask is not None:
+        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)
+        attn_output_weights = attn_output_weights.masked_fill(
+            key_padding_mask.unsqueeze(1).unsqueeze(2),
+            float('-inf'),
+        )
+        attn_output_weights = attn_output_weights.view(bsz * num_heads, tgt_len, src_len)
+
+    attn_output_weights = F.softmax(
+        attn_output_weights, dim=-1)
+    attn_output_weights = F.dropout(attn_output_weights, p=dropout_p, training=training)
+
+    attn_output = torch.bmm(attn_output_weights, v)
+    assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim]
+    attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+    attn_output = F.linear(attn_output, out_proj_weight, out_proj_bias)
+
+    if need_weights:
+        # average attention weights over heads
+        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)
+        return attn_output, attn_output_weights.sum(dim=1) / num_heads
+    else:
+        return attn_output, None
+
+class MultiheadAttention(Module):
+    r"""Allows the model to jointly attend to information
+    from different representation subspaces.
+    See reference: Attention Is All You Need
+    .. math::
+        \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
+        \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
+    Args:
+        embed_dim: total dimension of the model.
+        num_heads: parallel attention heads.
+        dropout: a Dropout layer on attn_output_weights. Default: 0.0.
+        bias: add bias as module parameter. Default: True.
+        add_bias_kv: add bias to the key and value sequences at dim=0.
+        add_zero_attn: add a new batch of zeros to the key and
+                       value sequences at dim=1.
+        kdim: total number of features in key. Default: None.
+        vdim: total number of features in value. Default: None.
+        Note: if kdim and vdim are None, they will be set to embed_dim such that
+        query, key, and value have the same number of features.
+    Examples::
+        >>> multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)
+        >>> attn_output, attn_output_weights = multihead_attn(query, key, value)
+    """
+    # __annotations__ = {
+    #     'bias_k': torch._jit_internal.Optional[torch.Tensor],
+    #     'bias_v': torch._jit_internal.Optional[torch.Tensor],
+    # }
+    __constants__ = ['q_proj_weight', 'k_proj_weight', 'v_proj_weight', 'in_proj_weight']
+
+    def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None):
+        super(MultiheadAttention, self).__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim
+
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+
+        if self._qkv_same_embed_dim is False:
+            self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))
+            self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))
+            self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))
+            self.register_parameter('in_proj_weight', None)
+        else:
+            self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim))
+            self.register_parameter('q_proj_weight', None)
+            self.register_parameter('k_proj_weight', None)
+            self.register_parameter('v_proj_weight', None)
+
+        if bias:
+            self.in_proj_bias = Parameter(torch.empty(3 * embed_dim))
+        else:
+            self.register_parameter('in_proj_bias', None)
+        self.out_proj = Linear(embed_dim, embed_dim, bias=bias)
+
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.empty(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.empty(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+
+        self.add_zero_attn = add_zero_attn
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        if self._qkv_same_embed_dim:
+            xavier_uniform_(self.in_proj_weight)
+        else:
+            xavier_uniform_(self.q_proj_weight)
+            xavier_uniform_(self.k_proj_weight)
+            xavier_uniform_(self.v_proj_weight)
+
+        if self.in_proj_bias is not None:
+            constant_(self.in_proj_bias, 0.)
+            constant_(self.out_proj.bias, 0.)
+        if self.bias_k is not None:
+            xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            xavier_normal_(self.bias_v)
+
+    def __setstate__(self, state):
+        # Support loading old MultiheadAttention checkpoints generated by v1.1.0
+        if '_qkv_same_embed_dim' not in state:
+            state['_qkv_same_embed_dim'] = True
+
+        super(MultiheadAttention, self).__setstate__(state)
+
+    def forward(self, query, key, value, key_padding_mask=None,
+                need_weights=True, attn_mask=None):
+        # type: (Tensor, Tensor, Tensor, Optional[Tensor], bool, Optional[Tensor]) -> Tuple[Tensor, Optional[Tensor]]
+        r"""
+    Args:
+        query, key, value: map a query and a set of key-value pairs to an output.
+            See "Attention Is All You Need" for more details.
+        key_padding_mask: if provided, specified padding elements in the key will
+            be ignored by the attention. This is an binary mask. When the value is True,
+            the corresponding value on the attention layer will be filled with -inf.
+        need_weights: output attn_output_weights.
+        attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
+            the batches while a 3D mask allows to specify a different mask for the entries of each batch.
+    Shape:
+        - Inputs:
+        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.
+          If a ByteTensor is provided, the non-zero positions will be ignored while the position
+          with the zero positions will be unchanged. If a BoolTensor is provided, the positions with the
+          value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
+        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
+          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
+          S is the source sequence length. attn_mask ensure that position i is allowed to attend the unmasked
+          positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+          while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``
+          is not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
+          is provided, it will be added to the attention weight.
+        - Outputs:
+        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
+          E is the embedding dimension.
+        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,
+          L is the target sequence length, S is the source sequence length.
+        """
+        if not self._qkv_same_embed_dim:
+            return multi_head_attention_forward(
+                query, key, value, self.embed_dim, self.num_heads,
+                self.in_proj_weight, self.in_proj_bias,
+                self.bias_k, self.bias_v, self.add_zero_attn,
+                self.dropout, self.out_proj.weight, self.out_proj.bias,
+                training=self.training,
+                key_padding_mask=key_padding_mask, need_weights=need_weights,
+                attn_mask=attn_mask, use_separate_proj_weight=True,
+                q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight,
+                v_proj_weight=self.v_proj_weight)
+        else:
+            return multi_head_attention_forward(
+                query, key, value, self.embed_dim, self.num_heads,
+                self.in_proj_weight, self.in_proj_bias,
+                self.bias_k, self.bias_v, self.add_zero_attn,
+                self.dropout, self.out_proj.weight, self.out_proj.bias,
+                training=self.training,
+                key_padding_mask=key_padding_mask, need_weights=need_weights,
+                attn_mask=attn_mask)
+
+
+class Transformer(Module):
+    r"""A transformer model. User is able to modify the attributes as needed. The architecture
+    is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer,
+    Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and
+    Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information
+    Processing Systems, pages 6000-6010. Users can build the BERT(https://arxiv.org/abs/1810.04805)
+    model with corresponding parameters.
+
+    Args:
+        d_model: the number of expected features in the encoder/decoder inputs (default=512).
+        nhead: the number of heads in the multiheadattention models (default=8).
+        num_encoder_layers: the number of sub-encoder-layers in the encoder (default=6).
+        num_decoder_layers: the number of sub-decoder-layers in the decoder (default=6).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of encoder/decoder intermediate layer, relu or gelu (default=relu).
+        custom_encoder: custom encoder (default=None).
+        custom_decoder: custom decoder (default=None).
+
+    Examples::
+        >>> transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
+        >>> src = torch.rand((10, 32, 512))
+        >>> tgt = torch.rand((20, 32, 512))
+        >>> out = transformer_model(src, tgt)
+
+    Note: A full example to apply nn.Transformer module for the word language model is available in
+    https://github.com/pytorch/examples/tree/master/word_language_model
+    """
+
+    def __init__(self, d_model=512, nhead=8, num_encoder_layers=6,
+                 num_decoder_layers=6, dim_feedforward=2048, dropout=0.1,
+                 activation="relu", custom_encoder=None, custom_decoder=None):
+        super(Transformer, self).__init__()
+
+        if custom_encoder is not None:
+            self.encoder = custom_encoder
+        else:
+            encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation)
+            encoder_norm = LayerNorm(d_model)
+            self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
+
+        if custom_decoder is not None:
+            self.decoder = custom_decoder
+        else:
+            decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation)
+            decoder_norm = LayerNorm(d_model)
+            self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm)
+
+        self._reset_parameters()
+
+        self.d_model = d_model
+        self.nhead = nhead
+
+    def forward(self, src, tgt, src_mask=None, tgt_mask=None,
+                memory_mask=None, src_key_padding_mask=None,
+                tgt_key_padding_mask=None, memory_key_padding_mask=None):
+        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor  # noqa
+        r"""Take in and process masked source/target sequences.
+
+        Args:
+            src: the sequence to the encoder (required).
+            tgt: the sequence to the decoder (required).
+            src_mask: the additive mask for the src sequence (optional).
+            tgt_mask: the additive mask for the tgt sequence (optional).
+            memory_mask: the additive mask for the encoder output (optional).
+            src_key_padding_mask: the ByteTensor mask for src keys per batch (optional).
+            tgt_key_padding_mask: the ByteTensor mask for tgt keys per batch (optional).
+            memory_key_padding_mask: the ByteTensor mask for memory keys per batch (optional).
+
+        Shape:
+            - src: :math:`(S, N, E)`.
+            - tgt: :math:`(T, N, E)`.
+            - src_mask: :math:`(S, S)`.
+            - tgt_mask: :math:`(T, T)`.
+            - memory_mask: :math:`(T, S)`.
+            - src_key_padding_mask: :math:`(N, S)`.
+            - tgt_key_padding_mask: :math:`(N, T)`.
+            - memory_key_padding_mask: :math:`(N, S)`.
+
+            Note: [src/tgt/memory]_mask ensures that position i is allowed to attend the unmasked
+            positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+            while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``
+            are not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
+            is provided, it will be added to the attention weight. 
+            [src/tgt/memory]_key_padding_mask provides specified elements in the key to be ignored by
+            the attention. If a ByteTensor is provided, the non-zero positions will be ignored while the zero
+            positions will be unchanged. If a BoolTensor is provided, the positions with the
+            value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
+
+            - output: :math:`(T, N, E)`.
+
+            Note: Due to the multi-head attention architecture in the transformer model,
+            the output sequence length of a transformer is same as the input sequence
+            (i.e. target) length of the decode.
+
+            where S is the source sequence length, T is the target sequence length, N is the
+            batch size, E is the feature number
+
+        Examples:
+            >>> output = transformer_model(src, tgt, src_mask=src_mask, tgt_mask=tgt_mask)
+        """
+
+        if src.size(1) != tgt.size(1):
+            raise RuntimeError("the batch number of src and tgt must be equal")
+
+        if src.size(2) != self.d_model or tgt.size(2) != self.d_model:
+            raise RuntimeError("the feature number of src and tgt must be equal to d_model")
+
+        memory = self.encoder(src, mask=src_mask, src_key_padding_mask=src_key_padding_mask)
+        output = self.decoder(tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask,
+                              tgt_key_padding_mask=tgt_key_padding_mask,
+                              memory_key_padding_mask=memory_key_padding_mask)
+        return output
+
+    def generate_square_subsequent_mask(self, sz):
+        r"""Generate a square mask for the sequence. The masked positions are filled with float('-inf').
+            Unmasked positions are filled with float(0.0).
+        """
+        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+        return mask
+
+    def _reset_parameters(self):
+        r"""Initiate parameters in the transformer model."""
+
+        for p in self.parameters():
+            if p.dim() > 1:
+                xavier_uniform_(p)
+
+
+class TransformerEncoder(Module):
+    r"""TransformerEncoder is a stack of N encoder layers
+
+    Args:
+        encoder_layer: an instance of the TransformerEncoderLayer() class (required).
+        num_layers: the number of sub-encoder-layers in the encoder (required).
+        norm: the layer normalization component (optional).
+
+    Examples::
+        >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
+        >>> transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
+        >>> src = torch.rand(10, 32, 512)
+        >>> out = transformer_encoder(src)
+    """
+    __constants__ = ['norm']
+
+    def __init__(self, encoder_layer, num_layers, norm=None):
+        super(TransformerEncoder, self).__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+
+    def forward(self, src, mask=None, src_key_padding_mask=None):
+        # type: (Tensor, Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the input through the encoder layers in turn.
+
+        Args:
+            src: the sequence to the encoder (required).
+            mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+
+        Shape:
+            see the docs in Transformer class.
+        """
+        output = src
+
+        for i, mod in enumerate(self.layers):
+            output = mod(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask)
+
+        if self.norm is not None:
+            output = self.norm(output)
+
+        return output
+
+
+class TransformerDecoder(Module):
+    r"""TransformerDecoder is a stack of N decoder layers
+
+    Args:
+        decoder_layer: an instance of the TransformerDecoderLayer() class (required).
+        num_layers: the number of sub-decoder-layers in the decoder (required).
+        norm: the layer normalization component (optional).
+
+    Examples::
+        >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
+        >>> transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
+        >>> memory = torch.rand(10, 32, 512)
+        >>> tgt = torch.rand(20, 32, 512)
+        >>> out = transformer_decoder(tgt, memory)
+    """
+    __constants__ = ['norm']
+
+    def __init__(self, decoder_layer, num_layers, norm=None):
+        super(TransformerDecoder, self).__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+
+    def forward(self, tgt, memory, memory2=None, tgt_mask=None,
+                memory_mask=None, memory_mask2=None, tgt_key_padding_mask=None,
+                memory_key_padding_mask=None, memory_key_padding_mask2=None):
+        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the inputs (and mask) through the decoder layer in turn.
+
+        Args:
+            tgt: the sequence to the decoder (required).
+            memory: the sequence from the last layer of the encoder (required).
+            tgt_mask: the mask for the tgt sequence (optional).
+            memory_mask: the mask for the memory sequence (optional).
+            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
+            memory_key_padding_mask: the mask for the memory keys per batch (optional).
+
+        Shape:
+            see the docs in Transformer class.
+        """
+        output = tgt
+
+        for mod in self.layers:
+            output = mod(output, memory, memory2=memory2, tgt_mask=tgt_mask,
+                         memory_mask=memory_mask, memory_mask2=memory_mask2,
+                         tgt_key_padding_mask=tgt_key_padding_mask,
+                         memory_key_padding_mask=memory_key_padding_mask,
+                         memory_key_padding_mask2=memory_key_padding_mask2)
+
+        if self.norm is not None:
+            output = self.norm(output)
+
+        return output
+
+class TransformerEncoderLayer(Module):
+    r"""TransformerEncoderLayer is made up of self-attn and feedforward network.
+    This standard encoder layer is based on the paper "Attention Is All You Need".
+    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
+    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
+    in a different way during application.
+
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of intermediate layer, relu or gelu (default=relu).
+
+    Examples::
+        >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
+        >>> src = torch.rand(10, 32, 512)
+        >>> out = encoder_layer(src)
+    """
+
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, 
+                 activation="relu", debug=False):
+        super(TransformerEncoderLayer, self).__init__()
+        self.debug = debug
+        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = Linear(d_model, dim_feedforward)
+        self.dropout = Dropout(dropout)
+        self.linear2 = Linear(dim_feedforward, d_model)
+
+        self.norm1 = LayerNorm(d_model)
+        self.norm2 = LayerNorm(d_model)
+        self.dropout1 = Dropout(dropout)
+        self.dropout2 = Dropout(dropout)
+
+        self.activation = _get_activation_fn(activation)
+
+    def __setstate__(self, state):
+        if 'activation' not in state:
+            state['activation'] = F.relu
+        super(TransformerEncoderLayer, self).__setstate__(state)
+
+    def forward(self, src, src_mask=None, src_key_padding_mask=None):
+        # type: (Tensor, Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the input through the encoder layer.
+
+        Args:
+            src: the sequence to the encoder layer (required).
+            src_mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+
+        Shape:
+            see the docs in Transformer class.
+        """
+        src2, attn = self.self_attn(src, src, src, attn_mask=src_mask,
+                              key_padding_mask=src_key_padding_mask)
+        if self.debug: self.attn = attn
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        
+        return src
+
+
+class TransformerDecoderLayer(Module):
+    r"""TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network.
+    This standard decoder layer is based on the paper "Attention Is All You Need".
+    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
+    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
+    in a different way during application.
+
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of intermediate layer, relu or gelu (default=relu).
+
+    Examples::
+        >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
+        >>> memory = torch.rand(10, 32, 512)
+        >>> tgt = torch.rand(20, 32, 512)
+        >>> out = decoder_layer(tgt, memory)
+    """
+
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, 
+                 activation="relu", self_attn=True, siamese=False, debug=False):
+        super(TransformerDecoderLayer, self).__init__()
+        self.has_self_attn, self.siamese = self_attn, siamese
+        self.debug = debug
+        if self.has_self_attn:
+            self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+            self.norm1 = LayerNorm(d_model)
+            self.dropout1 = Dropout(dropout)
+        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = Linear(d_model, dim_feedforward)
+        self.dropout = Dropout(dropout)
+        self.linear2 = Linear(dim_feedforward, d_model)
+
+        self.norm2 = LayerNorm(d_model)
+        self.norm3 = LayerNorm(d_model)
+        self.dropout2 = Dropout(dropout)
+        self.dropout3 = Dropout(dropout)
+        if self.siamese:
+            self.multihead_attn2 = MultiheadAttention(d_model, nhead, dropout=dropout)
+
+        self.activation = _get_activation_fn(activation)
+
+    def __setstate__(self, state):
+        if 'activation' not in state:
+            state['activation'] = F.relu
+        super(TransformerDecoderLayer, self).__setstate__(state)
+
+    def forward(self, tgt, memory, tgt_mask=None, memory_mask=None,
+                tgt_key_padding_mask=None, memory_key_padding_mask=None,
+                memory2=None, memory_mask2=None, memory_key_padding_mask2=None):
+        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the inputs (and mask) through the decoder layer.
+
+        Args:
+            tgt: the sequence to the decoder layer (required).
+            memory: the sequence from the last layer of the encoder (required).
+            tgt_mask: the mask for the tgt sequence (optional).
+            memory_mask: the mask for the memory sequence (optional).
+            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
+            memory_key_padding_mask: the mask for the memory keys per batch (optional).
+
+        Shape:
+            see the docs in Transformer class.
+        """
+        if self.has_self_attn:
+            tgt2, attn = self.self_attn(tgt, tgt, tgt, attn_mask=tgt_mask,
+                                key_padding_mask=tgt_key_padding_mask)
+            tgt = tgt + self.dropout1(tgt2)
+            tgt = self.norm1(tgt)
+            if self.debug: self.attn = attn
+        tgt2, attn2 = self.multihead_attn(tgt, memory, memory, attn_mask=memory_mask,
+                                   key_padding_mask=memory_key_padding_mask)
+        if self.debug: self.attn2 = attn2
+
+        if self.siamese:
+            tgt3, attn3 = self.multihead_attn2(tgt, memory2, memory2, attn_mask=memory_mask2,
+                            key_padding_mask=memory_key_padding_mask2)
+            tgt = tgt + self.dropout2(tgt3)
+            if self.debug: self.attn3 = attn3
+
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout3(tgt2)
+        tgt = self.norm3(tgt)
+        
+        return tgt
+
+
+def _get_clones(module, N):
+    return ModuleList([copy.deepcopy(module) for i in range(N)])
+
+
+def _get_activation_fn(activation):
+    if activation == "relu":
+        return F.relu
+    elif activation == "gelu":
+        return F.gelu
+
+    raise RuntimeError("activation should be relu/gelu, not {}".format(activation))
+
+
+class PositionalEncoding(nn.Module):
+    r"""Inject some information about the relative or absolute position of the tokens
+        in the sequence. The positional encodings have the same dimension as
+        the embeddings, so that the two can be summed. Here, we use sine and cosine
+        functions of different frequencies.
+    .. math::
+        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
+        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
+        \text{where pos is the word position and i is the embed idx)
+    Args:
+        d_model: the embed dim (required).
+        dropout: the dropout value (default=0.1).
+        max_len: the max. length of the incoming sequence (default=5000).
+    Examples:
+        >>> pos_encoder = PositionalEncoding(d_model)
+    """
+
+    def __init__(self, d_model, dropout=0.1, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        r"""Inputs of forward function
+        Args:
+            x: the sequence fed to the positional encoder model (required).
+        Shape:
+            x: [sequence length, batch size, embed dim]
+            output: [sequence length, batch size, embed dim]
+        Examples:
+            >>> output = pos_encoder(x)
+        """
+
+        x = x + self.pe[:x.size(0), :]
+        return self.dropout(x)
+
+
+if __name__ == '__main__':
+    transformer_model = Transformer(nhead=16, num_encoder_layers=12)
+    src = torch.rand((10, 32, 512))
+    tgt = torch.rand((20, 32, 512))
+    out = transformer_model(src, tgt)
+    print(out)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f691fb71c3d939059090f668c25737dd297f6160
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/projects/__init__.py
@@ -0,0 +1,31 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import importlib
+from pathlib import Path
+
+_PROJECTS = {
+    "point_rend": "PointRend",
+    "deeplab": "DeepLab",
+    "panoptic_deeplab": "Panoptic-DeepLab",
+}
+_PROJECT_ROOT = Path(__file__).parent.parent.parent / "projects"
+
+if _PROJECT_ROOT.is_dir():
+    # This is true only for in-place installation (pip install -e, setup.py develop),
+    # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
+
+    class _D2ProjectsFinder(importlib.abc.MetaPathFinder):
+        def find_spec(self, name, path, target=None):
+            if not name.startswith("detectron2.projects."):
+                return
+            project_name = name.split(".")[-1]
+            project_dir = _PROJECTS.get(project_name)
+            if not project_dir:
+                return
+            target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py"
+            if not target_file.is_file():
+                return
+            return importlib.util.spec_from_file_location(name, target_file)
+
+    import sys
+
+    sys.meta_path.append(_D2ProjectsFinder())
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/solver/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/solver/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a2dbd35bb24f0d4a979bc8f304142376d87e7ec
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/solver/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params
+from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR, LRMultiplier, WarmupParamScheduler
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/solver/build.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/solver/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8d1c0316ec703d4c9ca322a1eb1741a4751c6d5
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/solver/build.py
@@ -0,0 +1,252 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import logging
+from enum import Enum
+from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Type, Union
+import torch
+from fvcore.common.param_scheduler import CosineParamScheduler, MultiStepParamScheduler
+
+from detectron2.config import CfgNode
+
+from .lr_scheduler import LRMultiplier, WarmupParamScheduler
+
+_GradientClipperInput = Union[torch.Tensor, Iterable[torch.Tensor]]
+_GradientClipper = Callable[[_GradientClipperInput], None]
+
+
+class GradientClipType(Enum):
+    VALUE = "value"
+    NORM = "norm"
+
+
+def _create_gradient_clipper(cfg: CfgNode) -> _GradientClipper:
+    """
+    Creates gradient clipping closure to clip by value or by norm,
+    according to the provided config.
+    """
+    cfg = copy.deepcopy(cfg)
+
+    def clip_grad_norm(p: _GradientClipperInput):
+        torch.nn.utils.clip_grad_norm_(p, cfg.CLIP_VALUE, cfg.NORM_TYPE)
+
+    def clip_grad_value(p: _GradientClipperInput):
+        torch.nn.utils.clip_grad_value_(p, cfg.CLIP_VALUE)
+
+    _GRADIENT_CLIP_TYPE_TO_CLIPPER = {
+        GradientClipType.VALUE: clip_grad_value,
+        GradientClipType.NORM: clip_grad_norm,
+    }
+    return _GRADIENT_CLIP_TYPE_TO_CLIPPER[GradientClipType(cfg.CLIP_TYPE)]
+
+
+def _generate_optimizer_class_with_gradient_clipping(
+    optimizer: Type[torch.optim.Optimizer],
+    *,
+    per_param_clipper: Optional[_GradientClipper] = None,
+    global_clipper: Optional[_GradientClipper] = None,
+) -> Type[torch.optim.Optimizer]:
+    """
+    Dynamically creates a new type that inherits the type of a given instance
+    and overrides the `step` method to add gradient clipping
+    """
+    assert (
+        per_param_clipper is None or global_clipper is None
+    ), "Not allowed to use both per-parameter clipping and global clipping"
+
+    def optimizer_wgc_step(self, closure=None):
+        if per_param_clipper is not None:
+            for group in self.param_groups:
+                for p in group["params"]:
+                    per_param_clipper(p)
+        else:
+            # global clipper for future use with detr
+            # (https://github.com/facebookresearch/detr/pull/287)
+            all_params = itertools.chain(*[g["params"] for g in self.param_groups])
+            global_clipper(all_params)
+        super(type(self), self).step(closure)
+
+    OptimizerWithGradientClip = type(
+        optimizer.__name__ + "WithGradientClip",
+        (optimizer,),
+        {"step": optimizer_wgc_step},
+    )
+    return OptimizerWithGradientClip
+
+
+def maybe_add_gradient_clipping(
+    cfg: CfgNode, optimizer: Type[torch.optim.Optimizer]
+) -> Type[torch.optim.Optimizer]:
+    """
+    If gradient clipping is enabled through config options, wraps the existing
+    optimizer type to become a new dynamically created class OptimizerWithGradientClip
+    that inherits the given optimizer and overrides the `step` method to
+    include gradient clipping.
+
+    Args:
+        cfg: CfgNode, configuration options
+        optimizer: type. A subclass of torch.optim.Optimizer
+
+    Return:
+        type: either the input `optimizer` (if gradient clipping is disabled), or
+            a subclass of it with gradient clipping included in the `step` method.
+    """
+    if not cfg.SOLVER.CLIP_GRADIENTS.ENABLED:
+        return optimizer
+    if isinstance(optimizer, torch.optim.Optimizer):
+        optimizer_type = type(optimizer)
+    else:
+        assert issubclass(optimizer, torch.optim.Optimizer), optimizer
+        optimizer_type = optimizer
+
+    grad_clipper = _create_gradient_clipper(cfg.SOLVER.CLIP_GRADIENTS)
+    OptimizerWithGradientClip = _generate_optimizer_class_with_gradient_clipping(
+        optimizer_type, per_param_clipper=grad_clipper
+    )
+    if isinstance(optimizer, torch.optim.Optimizer):
+        optimizer.__class__ = OptimizerWithGradientClip  # a bit hacky, not recommended
+        return optimizer
+    else:
+        return OptimizerWithGradientClip
+
+
+def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer:
+    """
+    Build an optimizer from config.
+    """
+    params = get_default_optimizer_params(
+        model,
+        base_lr=cfg.SOLVER.BASE_LR,
+        weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
+        bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
+        weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
+    )
+    return maybe_add_gradient_clipping(cfg, torch.optim.SGD)(
+        params,
+        lr=cfg.SOLVER.BASE_LR,
+        momentum=cfg.SOLVER.MOMENTUM,
+        nesterov=cfg.SOLVER.NESTEROV,
+        weight_decay=cfg.SOLVER.WEIGHT_DECAY,
+    )
+
+
+def get_default_optimizer_params(
+    model: torch.nn.Module,
+    base_lr: Optional[float] = None,
+    weight_decay: Optional[float] = None,
+    weight_decay_norm: Optional[float] = None,
+    bias_lr_factor: Optional[float] = 1.0,
+    weight_decay_bias: Optional[float] = None,
+    overrides: Optional[Dict[str, Dict[str, float]]] = None,
+):
+    """
+    Get default param list for optimizer, with support for a few types of
+    overrides. If not overrides needed, this is equivalent to `model.parameters()`.
+
+    Args:
+        base_lr: lr for every group by default. Can be omitted to use the one in optimizer.
+        weight_decay: weight decay for every group by default. Can be omitted to use the one
+            in optimizer.
+        weight_decay_norm: override weight decay for params in normalization layers
+        bias_lr_factor: multiplier of lr for bias parameters.
+        weight_decay_bias: override weight decay for bias parameters
+        overrides: if not `None`, provides values for optimizer hyperparameters
+            (LR, weight decay) for module parameters with a given name; e.g.
+            ``{"embedding": {"lr": 0.01, "weight_decay": 0.1}}`` will set the LR and
+            weight decay values for all module parameters named `embedding`.
+
+    For common detection models, ``weight_decay_norm`` is the only option
+    needed to be set. ``bias_lr_factor,weight_decay_bias`` are legacy settings
+    from Detectron1 that are not found useful.
+
+    Example:
+    ::
+        torch.optim.SGD(get_default_optimizer_params(model, weight_decay_norm=0),
+                       lr=0.01, weight_decay=1e-4, momentum=0.9)
+    """
+    if overrides is None:
+        overrides = {}
+    defaults = {}
+    if base_lr is not None:
+        defaults["lr"] = base_lr
+    if weight_decay is not None:
+        defaults["weight_decay"] = weight_decay
+    bias_overrides = {}
+    if bias_lr_factor is not None and bias_lr_factor != 1.0:
+        # NOTE: unlike Detectron v1, we now by default make bias hyperparameters
+        # exactly the same as regular weights.
+        if base_lr is None:
+            raise ValueError("bias_lr_factor requires base_lr")
+        bias_overrides["lr"] = base_lr * bias_lr_factor
+    if weight_decay_bias is not None:
+        bias_overrides["weight_decay"] = weight_decay_bias
+    if len(bias_overrides):
+        if "bias" in overrides:
+            raise ValueError("Conflicting overrides for 'bias'")
+        overrides["bias"] = bias_overrides
+
+    norm_module_types = (
+        torch.nn.BatchNorm1d,
+        torch.nn.BatchNorm2d,
+        torch.nn.BatchNorm3d,
+        torch.nn.SyncBatchNorm,
+        # NaiveSyncBatchNorm inherits from BatchNorm2d
+        torch.nn.GroupNorm,
+        torch.nn.InstanceNorm1d,
+        torch.nn.InstanceNorm2d,
+        torch.nn.InstanceNorm3d,
+        torch.nn.LayerNorm,
+        torch.nn.LocalResponseNorm,
+    )
+    params: List[Dict[str, Any]] = []
+    memo: Set[torch.nn.parameter.Parameter] = set()
+    for module in model.modules():
+        for module_param_name, value in module.named_parameters(recurse=False):
+            if not value.requires_grad:
+                continue
+            # Avoid duplicating parameters
+            if value in memo:
+                continue
+            memo.add(value)
+
+            hyperparams = copy.copy(defaults)
+            if isinstance(module, norm_module_types) and weight_decay_norm is not None:
+                hyperparams["weight_decay"] = weight_decay_norm
+            hyperparams.update(overrides.get(module_param_name, {}))
+            params.append({"params": [value], **hyperparams})
+    return params
+
+
+def build_lr_scheduler(
+    cfg: CfgNode, optimizer: torch.optim.Optimizer
+) -> torch.optim.lr_scheduler._LRScheduler:
+    """
+    Build a LR scheduler from config.
+    """
+    name = cfg.SOLVER.LR_SCHEDULER_NAME
+
+    if name == "WarmupMultiStepLR":
+        steps = [x for x in cfg.SOLVER.STEPS if x <= cfg.SOLVER.MAX_ITER]
+        if len(steps) != len(cfg.SOLVER.STEPS):
+            logger = logging.getLogger(__name__)
+            logger.warning(
+                "SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. "
+                "These values will be ignored."
+            )
+        sched = MultiStepParamScheduler(
+            values=[cfg.SOLVER.GAMMA ** k for k in range(len(steps) + 1)],
+            milestones=steps,
+            num_updates=cfg.SOLVER.MAX_ITER,
+        )
+    elif name == "WarmupCosineLR":
+        sched = CosineParamScheduler(1, 0)
+    else:
+        raise ValueError("Unknown LR scheduler: {}".format(name))
+
+    sched = WarmupParamScheduler(
+        sched,
+        cfg.SOLVER.WARMUP_FACTOR,
+        cfg.SOLVER.WARMUP_ITERS / cfg.SOLVER.MAX_ITER,
+        cfg.SOLVER.WARMUP_METHOD,
+    )
+    return LRMultiplier(optimizer, multiplier=sched, max_iter=cfg.SOLVER.MAX_ITER)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/solver/lr_scheduler.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/solver/lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..8803e87b9e60cffdbe048c97c282d353191ae4c8
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/solver/lr_scheduler.py
@@ -0,0 +1,238 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import math
+from bisect import bisect_right
+from typing import List
+import torch
+from fvcore.common.param_scheduler import (
+    CompositeParamScheduler,
+    ConstantParamScheduler,
+    LinearParamScheduler,
+    ParamScheduler,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class WarmupParamScheduler(CompositeParamScheduler):
+    """
+    Add an initial warmup stage to another scheduler.
+    """
+
+    def __init__(
+        self,
+        scheduler: ParamScheduler,
+        warmup_factor: float,
+        warmup_length: float,
+        warmup_method: str = "linear",
+    ):
+        """
+        Args:
+            scheduler: warmup will be added at the beginning of this scheduler
+            warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001
+            warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire
+                training, e.g. 0.01
+            warmup_method: one of "linear" or "constant"
+        """
+        end_value = scheduler(warmup_length)  # the value to reach when warmup ends
+        start_value = warmup_factor * scheduler(0.0)
+        if warmup_method == "constant":
+            warmup = ConstantParamScheduler(start_value)
+        elif warmup_method == "linear":
+            warmup = LinearParamScheduler(start_value, end_value)
+        else:
+            raise ValueError("Unknown warmup method: {}".format(warmup_method))
+        super().__init__(
+            [warmup, scheduler],
+            interval_scaling=["rescaled", "fixed"],
+            lengths=[warmup_length, 1 - warmup_length],
+        )
+
+
+class LRMultiplier(torch.optim.lr_scheduler._LRScheduler):
+    """
+    A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the
+    learning rate of each param in the optimizer.
+    Every step, the learning rate of each parameter becomes its initial value
+    multiplied by the output of the given :class:`ParamScheduler`.
+
+    The absolute learning rate value of each parameter can be different.
+    This scheduler can be used as long as the relative scale among them do
+    not change during training.
+
+    Examples:
+    ::
+        LRMultiplier(
+            opt,
+            WarmupParamScheduler(
+                MultiStepParamScheduler(
+                    [1, 0.1, 0.01],
+                    milestones=[60000, 80000],
+                    num_updates=90000,
+                ), 0.001, 100 / 90000
+            ),
+            max_iter=90000
+        )
+    """
+
+    # NOTES: in the most general case, every LR can use its own scheduler.
+    # Supporting this requires interaction with the optimizer when its parameter
+    # group is initialized. For example, classyvision implements its own optimizer
+    # that allows different schedulers for every parameter group.
+    # To avoid this complexity, we use this class to support the most common cases
+    # where the relative scale among all LRs stay unchanged during training.  In this
+    # case we only need a total of one scheduler that defines the relative LR multiplier.
+
+    def __init__(
+        self,
+        optimizer: torch.optim.Optimizer,
+        multiplier: ParamScheduler,
+        max_iter: int,
+        last_iter: int = -1,
+    ):
+        """
+        Args:
+            optimizer, last_iter: See ``torch.optim.lr_scheduler._LRScheduler``.
+                ``last_iter`` is the same as ``last_epoch``.
+            multiplier: a fvcore ParamScheduler that defines the multiplier on
+                every LR of the optimizer
+            max_iter: the total number of training iterations
+        """
+        if not isinstance(multiplier, ParamScheduler):
+            raise ValueError(
+                "_LRMultiplier(multiplier=) must be an instance of fvcore "
+                f"ParamScheduler. Got {multiplier} instead."
+            )
+        self._multiplier = multiplier
+        self._max_iter = max_iter
+        super().__init__(optimizer, last_epoch=last_iter)
+
+    def state_dict(self):
+        # fvcore schedulers are stateless. Only keep pytorch scheduler states
+        return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch}
+
+    def get_lr(self) -> List[float]:
+        multiplier = self._multiplier(self.last_epoch / self._max_iter)
+        return [base_lr * multiplier for base_lr in self.base_lrs]
+
+
+"""
+Content below is no longer needed!
+"""
+
+# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
+# only on epoch boundaries. We typically use iteration based schedules instead.
+# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean
+# "iteration" instead.
+
+# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating
+# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it.
+
+
+class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
+    def __init__(
+        self,
+        optimizer: torch.optim.Optimizer,
+        milestones: List[int],
+        gamma: float = 0.1,
+        warmup_factor: float = 0.001,
+        warmup_iters: int = 1000,
+        warmup_method: str = "linear",
+        last_epoch: int = -1,
+    ):
+        logger.warning(
+            "WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
+        )
+        if not list(milestones) == sorted(milestones):
+            raise ValueError(
+                "Milestones should be a list of" " increasing integers. Got {}", milestones
+            )
+        self.milestones = milestones
+        self.gamma = gamma
+        self.warmup_factor = warmup_factor
+        self.warmup_iters = warmup_iters
+        self.warmup_method = warmup_method
+        super().__init__(optimizer, last_epoch)
+
+    def get_lr(self) -> List[float]:
+        warmup_factor = _get_warmup_factor_at_iter(
+            self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
+        )
+        return [
+            base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
+            for base_lr in self.base_lrs
+        ]
+
+    def _compute_values(self) -> List[float]:
+        # The new interface
+        return self.get_lr()
+
+
+class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler):
+    def __init__(
+        self,
+        optimizer: torch.optim.Optimizer,
+        max_iters: int,
+        warmup_factor: float = 0.001,
+        warmup_iters: int = 1000,
+        warmup_method: str = "linear",
+        last_epoch: int = -1,
+    ):
+        logger.warning(
+            "WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
+        )
+        self.max_iters = max_iters
+        self.warmup_factor = warmup_factor
+        self.warmup_iters = warmup_iters
+        self.warmup_method = warmup_method
+        super().__init__(optimizer, last_epoch)
+
+    def get_lr(self) -> List[float]:
+        warmup_factor = _get_warmup_factor_at_iter(
+            self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
+        )
+        # Different definitions of half-cosine with warmup are possible. For
+        # simplicity we multiply the standard half-cosine schedule by the warmup
+        # factor. An alternative is to start the period of the cosine at warmup_iters
+        # instead of at 0. In the case that warmup_iters << max_iters the two are
+        # very close to each other.
+        return [
+            base_lr
+            * warmup_factor
+            * 0.5
+            * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters))
+            for base_lr in self.base_lrs
+        ]
+
+    def _compute_values(self) -> List[float]:
+        # The new interface
+        return self.get_lr()
+
+
+def _get_warmup_factor_at_iter(
+    method: str, iter: int, warmup_iters: int, warmup_factor: float
+) -> float:
+    """
+    Return the learning rate warmup factor at a specific iteration.
+    See :paper:`ImageNet in 1h` for more details.
+
+    Args:
+        method (str): warmup method; either "constant" or "linear".
+        iter (int): iteration at which to calculate the warmup factor.
+        warmup_iters (int): the number of warmup iterations.
+        warmup_factor (float): the base warmup factor (the meaning changes according
+            to the method used).
+
+    Returns:
+        float: the effective warmup factor at the given iteration.
+    """
+    if iter >= warmup_iters:
+        return 1.0
+
+    if method == "constant":
+        return warmup_factor
+    elif method == "linear":
+        alpha = iter / warmup_iters
+        return warmup_factor * (1 - alpha) + alpha
+    else:
+        raise ValueError("Unknown warmup method: {}".format(method))
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..404af8815072f9d66da32c523718dd8b023b5a60
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa
+from .image_list import ImageList
+
+from .instances import Instances
+from .keypoints import Keypoints, heatmaps_to_keypoints
+from .masks import BitMasks, PolygonMasks, polygons_to_bitmask
+from .rotated_boxes import RotatedBoxes
+from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/boxes.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d8762d60a873c6b6daa42e9e7fcac41eda32fec
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/boxes.py
@@ -0,0 +1,416 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+import numpy as np
+from enum import IntEnum, unique
+from typing import List, Tuple, Union
+import torch
+from torch import device
+
+from detectron2.utils.env import TORCH_VERSION
+
+_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray]
+
+
+if TORCH_VERSION < (1, 8):
+    _maybe_jit_unused = torch.jit.unused
+else:
+
+    def _maybe_jit_unused(x):
+        return x
+
+
+@unique
+class BoxMode(IntEnum):
+    """
+    Enum of different ways to represent a box.
+    """
+
+    XYXY_ABS = 0
+    """
+    (x0, y0, x1, y1) in absolute floating points coordinates.
+    The coordinates in range [0, width or height].
+    """
+    XYWH_ABS = 1
+    """
+    (x0, y0, w, h) in absolute floating points coordinates.
+    """
+    XYXY_REL = 2
+    """
+    Not yet supported!
+    (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image.
+    """
+    XYWH_REL = 3
+    """
+    Not yet supported!
+    (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image.
+    """
+    XYWHA_ABS = 4
+    """
+    (xc, yc, w, h, a) in absolute floating points coordinates.
+    (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw.
+    """
+
+    @staticmethod
+    def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType:
+        """
+        Args:
+            box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5
+            from_mode, to_mode (BoxMode)
+
+        Returns:
+            The converted box of the same type.
+        """
+        if from_mode == to_mode:
+            return box
+
+        original_type = type(box)
+        is_numpy = isinstance(box, np.ndarray)
+        single_box = isinstance(box, (list, tuple))
+        if single_box:
+            assert len(box) == 4 or len(box) == 5, (
+                "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor,"
+                " where k == 4 or 5"
+            )
+            arr = torch.tensor(box)[None, :]
+        else:
+            # avoid modifying the input box
+            if is_numpy:
+                arr = torch.from_numpy(np.asarray(box)).clone()
+            else:
+                arr = box.clone()
+
+        assert to_mode not in [BoxMode.XYXY_REL, BoxMode.XYWH_REL] and from_mode not in [
+            BoxMode.XYXY_REL,
+            BoxMode.XYWH_REL,
+        ], "Relative mode not yet supported!"
+
+        if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS:
+            assert (
+                arr.shape[-1] == 5
+            ), "The last dimension of input shape must be 5 for XYWHA format"
+            original_dtype = arr.dtype
+            arr = arr.double()
+
+            w = arr[:, 2]
+            h = arr[:, 3]
+            a = arr[:, 4]
+            c = torch.abs(torch.cos(a * math.pi / 180.0))
+            s = torch.abs(torch.sin(a * math.pi / 180.0))
+            # This basically computes the horizontal bounding rectangle of the rotated box
+            new_w = c * w + s * h
+            new_h = c * h + s * w
+
+            # convert center to top-left corner
+            arr[:, 0] -= new_w / 2.0
+            arr[:, 1] -= new_h / 2.0
+            # bottom-right corner
+            arr[:, 2] = arr[:, 0] + new_w
+            arr[:, 3] = arr[:, 1] + new_h
+
+            arr = arr[:, :4].to(dtype=original_dtype)
+        elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS:
+            original_dtype = arr.dtype
+            arr = arr.double()
+            arr[:, 0] += arr[:, 2] / 2.0
+            arr[:, 1] += arr[:, 3] / 2.0
+            angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype)
+            arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype)
+        else:
+            if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS:
+                arr[:, 2] += arr[:, 0]
+                arr[:, 3] += arr[:, 1]
+            elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS:
+                arr[:, 2] -= arr[:, 0]
+                arr[:, 3] -= arr[:, 1]
+            else:
+                raise NotImplementedError(
+                    "Conversion from BoxMode {} to {} is not supported yet".format(
+                        from_mode, to_mode
+                    )
+                )
+
+        if single_box:
+            return original_type(arr.flatten().tolist())
+        if is_numpy:
+            return arr.numpy()
+        else:
+            return arr
+
+
+class Boxes:
+    """
+    This structure stores a list of boxes as a Nx4 torch.Tensor.
+    It supports some common methods about boxes
+    (`area`, `clip`, `nonempty`, etc),
+    and also behaves like a Tensor
+    (support indexing, `to(device)`, `.device`, and iteration over all boxes)
+
+    Attributes:
+        tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2).
+    """
+
+    def __init__(self, tensor: torch.Tensor):
+        """
+        Args:
+            tensor (Tensor[float]): a Nx4 matrix.  Each row is (x1, y1, x2, y2).
+        """
+        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that does not depend on
+            # the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((-1, 4)).to(dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size()
+
+        self.tensor = tensor
+
+    def clone(self) -> "Boxes":
+        """
+        Clone the Boxes.
+
+        Returns:
+            Boxes
+        """
+        return Boxes(self.tensor.clone())
+
+    @_maybe_jit_unused
+    def to(self, device: torch.device):
+        # Boxes are assumed float32 and does not support to(dtype)
+        return Boxes(self.tensor.to(device=device))
+
+    def area(self) -> torch.Tensor:
+        """
+        Computes the area of all the boxes.
+
+        Returns:
+            torch.Tensor: a vector with areas of each box.
+        """
+        box = self.tensor
+        area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
+        return area
+
+    def clip(self, box_size: Tuple[int, int]) -> None:
+        """
+        Clip (in place) the boxes by limiting x coordinates to the range [0, width]
+        and y coordinates to the range [0, height].
+
+        Args:
+            box_size (height, width): The clipping box's size.
+        """
+        assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!"
+        h, w = box_size
+        x1 = self.tensor[:, 0].clamp(min=0, max=w)
+        y1 = self.tensor[:, 1].clamp(min=0, max=h)
+        x2 = self.tensor[:, 2].clamp(min=0, max=w)
+        y2 = self.tensor[:, 3].clamp(min=0, max=h)
+        self.tensor = torch.stack((x1, y1, x2, y2), dim=-1)
+
+    def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
+        """
+        Find boxes that are non-empty.
+        A box is considered empty, if either of its side is no larger than threshold.
+
+        Returns:
+            Tensor:
+                a binary vector which represents whether each box is empty
+                (False) or non-empty (True).
+        """
+        box = self.tensor
+        widths = box[:, 2] - box[:, 0]
+        heights = box[:, 3] - box[:, 1]
+        keep = (widths > threshold) & (heights > threshold)
+        return keep
+
+    def __getitem__(self, item) -> "Boxes":
+        """
+        Args:
+            item: int, slice, or a BoolTensor
+
+        Returns:
+            Boxes: Create a new :class:`Boxes` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box.
+        2. `new_boxes = boxes[2:10]`: return a slice of boxes.
+        3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor
+           with `length = len(boxes)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned Boxes might share storage with this Boxes,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return Boxes(self.tensor[item].view(1, -1))
+        b = self.tensor[item]
+        assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item)
+        return Boxes(b)
+
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+
+    def __repr__(self) -> str:
+        return "Boxes(" + str(self.tensor) + ")"
+
+    def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor:
+        """
+        Args:
+            box_size (height, width): Size of the reference box.
+            boundary_threshold (int): Boxes that extend beyond the reference box
+                boundary by more than boundary_threshold are considered "outside".
+
+        Returns:
+            a binary vector, indicating whether each box is inside the reference box.
+        """
+        height, width = box_size
+        inds_inside = (
+            (self.tensor[..., 0] >= -boundary_threshold)
+            & (self.tensor[..., 1] >= -boundary_threshold)
+            & (self.tensor[..., 2] < width + boundary_threshold)
+            & (self.tensor[..., 3] < height + boundary_threshold)
+        )
+        return inds_inside
+
+    def get_centers(self) -> torch.Tensor:
+        """
+        Returns:
+            The box centers in a Nx2 array of (x, y).
+        """
+        return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2
+
+    def scale(self, scale_x: float, scale_y: float) -> None:
+        """
+        Scale the box with horizontal and vertical scaling factors
+        """
+        self.tensor[:, 0::2] *= scale_x
+        self.tensor[:, 1::2] *= scale_y
+
+    @classmethod
+    @_maybe_jit_unused
+    def cat(cls, boxes_list: List["Boxes"]) -> "Boxes":
+        """
+        Concatenates a list of Boxes into a single Boxes
+
+        Arguments:
+            boxes_list (list[Boxes])
+
+        Returns:
+            Boxes: the concatenated Boxes
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all([isinstance(box, Boxes) for box in boxes_list])
+
+        # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
+        return cat_boxes
+
+    @property
+    def device(self) -> device:
+        return self.tensor.device
+
+    # type "Iterator[torch.Tensor]", yield, and iter() not supported by torchscript
+    # https://github.com/pytorch/pytorch/issues/18627
+    @torch.jit.unused
+    def __iter__(self):
+        """
+        Yield a box as a Tensor of shape (4,) at a time.
+        """
+        yield from self.tensor
+
+
+def pairwise_intersection(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Given two lists of boxes of size N and M,
+    compute the intersection area between __all__ N x M pairs of boxes.
+    The box order must be (xmin, ymin, xmax, ymax)
+
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+
+    Returns:
+        Tensor: intersection, sized [N,M].
+    """
+    boxes1, boxes2 = boxes1.tensor, boxes2.tensor
+    width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max(
+        boxes1[:, None, :2], boxes2[:, :2]
+    )  # [N,M,2]
+
+    width_height.clamp_(min=0)  # [N,M,2]
+    intersection = width_height.prod(dim=2)  # [N,M]
+    return intersection
+
+
+# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+# with slight modifications
+def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Given two lists of boxes of size N and M, compute the IoU
+    (intersection over union) between **all** N x M pairs of boxes.
+    The box order must be (xmin, ymin, xmax, ymax).
+
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+
+    Returns:
+        Tensor: IoU, sized [N,M].
+    """
+    area1 = boxes1.area()  # [N]
+    area2 = boxes2.area()  # [M]
+    inter = pairwise_intersection(boxes1, boxes2)
+
+    # handle empty boxes
+    iou = torch.where(
+        inter > 0,
+        inter / (area1[:, None] + area2 - inter),
+        torch.zeros(1, dtype=inter.dtype, device=inter.device),
+    )
+    return iou
+
+
+def pairwise_ioa(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Similar to :func:`pariwise_iou` but compute the IoA (intersection over boxes2 area).
+
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+
+    Returns:
+        Tensor: IoA, sized [N,M].
+    """
+    area2 = boxes2.area()  # [M]
+    inter = pairwise_intersection(boxes1, boxes2)
+
+    # handle empty boxes
+    ioa = torch.where(
+        inter > 0, inter / area2, torch.zeros(1, dtype=inter.dtype, device=inter.device)
+    )
+    return ioa
+
+
+def matched_boxlist_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Compute pairwise intersection over union (IOU) of two sets of matched
+    boxes. The box order must be (xmin, ymin, xmax, ymax).
+    Similar to boxlist_iou, but computes only diagonal elements of the matrix
+
+    Args:
+        boxes1: (Boxes) bounding boxes, sized [N,4].
+        boxes2: (Boxes) bounding boxes, sized [N,4].
+    Returns:
+        Tensor: iou, sized [N].
+    """
+    assert len(boxes1) == len(
+        boxes2
+    ), "boxlists should have the same" "number of entries, got {}, {}".format(
+        len(boxes1), len(boxes2)
+    )
+    area1 = boxes1.area()  # [N]
+    area2 = boxes2.area()  # [N]
+    box1, box2 = boxes1.tensor, boxes2.tensor
+    lt = torch.max(box1[:, :2], box2[:, :2])  # [N,2]
+    rb = torch.min(box1[:, 2:], box2[:, 2:])  # [N,2]
+    wh = (rb - lt).clamp(min=0)  # [N,2]
+    inter = wh[:, 0] * wh[:, 1]  # [N]
+    iou = inter / (area1 + area2 - inter)  # [N]
+    return iou
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/image_list.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/image_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..26e6e49c55e27120ab26b6107cebb6c885f81c38
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/image_list.py
@@ -0,0 +1,124 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from __future__ import division
+from typing import Any, List, Tuple
+import torch
+from torch import device
+from torch.nn import functional as F
+
+from detectron2.utils.env import TORCH_VERSION
+
+
+def _as_tensor(x: Tuple[int, int]) -> torch.Tensor:
+    """
+    An equivalent of `torch.as_tensor`, but works under tracing if input
+    is a list of tensor. `torch.as_tensor` will record a constant in tracing,
+    but this function will use `torch.stack` instead.
+    """
+    if torch.jit.is_scripting():
+        return torch.as_tensor(x)
+    if isinstance(x, (list, tuple)) and all([isinstance(t, torch.Tensor) for t in x]):
+        return torch.stack(x)
+    return torch.as_tensor(x)
+
+
+class ImageList(object):
+    """
+    Structure that holds a list of images (of possibly
+    varying sizes) as a single tensor.
+    This works by padding the images to the same size,
+    and storing in a field the original sizes of each image
+
+    Attributes:
+        image_sizes (list[tuple[int, int]]): each tuple is (h, w).
+            During tracing, it becomes list[Tensor] instead.
+    """
+
+    def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
+        """
+        Arguments:
+            tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
+            image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
+                be smaller than (H, W) due to padding.
+        """
+        self.tensor = tensor
+        self.image_sizes = image_sizes
+
+    def __len__(self) -> int:
+        return len(self.image_sizes)
+
+    def __getitem__(self, idx) -> torch.Tensor:
+        """
+        Access the individual image in its original size.
+
+        Args:
+            idx: int or slice
+
+        Returns:
+            Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
+        """
+        size = self.image_sizes[idx]
+        return self.tensor[idx, ..., : size[0], : size[1]]
+
+    @torch.jit.unused
+    def to(self, *args: Any, **kwargs: Any) -> "ImageList":
+        cast_tensor = self.tensor.to(*args, **kwargs)
+        return ImageList(cast_tensor, self.image_sizes)
+
+    @property
+    def device(self) -> device:
+        return self.tensor.device
+
+    @staticmethod
+    def from_tensors(
+        tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
+    ) -> "ImageList":
+        """
+        Args:
+            tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or
+                (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
+                to the same shape with `pad_value`.
+            size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
+                the common height and width is divisible by `size_divisibility`.
+                This depends on the model and many models need a divisibility of 32.
+            pad_value (float): value to pad
+
+        Returns:
+            an `ImageList`.
+        """
+        assert len(tensors) > 0
+        assert isinstance(tensors, (tuple, list))
+        for t in tensors:
+            assert isinstance(t, torch.Tensor), type(t)
+            assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
+
+        image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
+        image_sizes_tensor = [_as_tensor(x) for x in image_sizes]
+        max_size = torch.stack(image_sizes_tensor).max(0).values
+
+        if size_divisibility > 1:
+            stride = size_divisibility
+            # the last two dims are H,W, both subject to divisibility requirement
+            max_size = (max_size + (stride - 1)) // stride * stride
+
+        # handle weirdness of scripting and tracing ...
+        if torch.jit.is_scripting():
+            max_size: List[int] = max_size.to(dtype=torch.long).tolist()
+        else:
+            # https://github.com/pytorch/pytorch/issues/42448
+            if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing():
+                image_sizes = image_sizes_tensor
+
+        if len(tensors) == 1:
+            # This seems slightly (2%) faster.
+            # TODO: check whether it's faster for multiple images as well
+            image_size = image_sizes[0]
+            padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
+            batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
+        else:
+            # max_size can be a tensor in tracing mode, therefore convert to list
+            batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
+            batched_imgs = tensors[0].new_full(batch_shape, pad_value)
+            for img, pad_img in zip(tensors, batched_imgs):
+                pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
+
+        return ImageList(batched_imgs.contiguous(), image_sizes)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/instances.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/instances.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6bc832796b1a71dfa3ce6c06735ad02acb7a482
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/instances.py
@@ -0,0 +1,191 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+from typing import Any, Dict, List, Tuple, Union
+import torch
+
+
+class Instances:
+    """
+    This class represents a list of instances in an image.
+    It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields".
+    All fields must have the same ``__len__`` which is the number of instances.
+
+    All other (non-field) attributes of this class are considered private:
+    they must start with '_' and are not modifiable by a user.
+
+    Some basic usage:
+
+    1. Set/get/check a field:
+
+       .. code-block:: python
+
+          instances.gt_boxes = Boxes(...)
+          print(instances.pred_masks)  # a tensor of shape (N, H, W)
+          print('gt_masks' in instances)
+
+    2. ``len(instances)`` returns the number of instances
+    3. Indexing: ``instances[indices]`` will apply the indexing on all the fields
+       and returns a new :class:`Instances`.
+       Typically, ``indices`` is a integer vector of indices,
+       or a binary mask of length ``num_instances``
+
+       .. code-block:: python
+
+          category_3_detections = instances[instances.pred_classes == 3]
+          confident_detections = instances[instances.scores > 0.9]
+    """
+
+    def __init__(self, image_size: Tuple[int, int], **kwargs: Any):
+        """
+        Args:
+            image_size (height, width): the spatial size of the image.
+            kwargs: fields to add to this `Instances`.
+        """
+        self._image_size = image_size
+        self._fields: Dict[str, Any] = {}
+        for k, v in kwargs.items():
+            self.set(k, v)
+
+    @property
+    def image_size(self) -> Tuple[int, int]:
+        """
+        Returns:
+            tuple: height, width
+        """
+        return self._image_size
+
+    def __setattr__(self, name: str, val: Any) -> None:
+        if name.startswith("_"):
+            super().__setattr__(name, val)
+        else:
+            self.set(name, val)
+
+    def __getattr__(self, name: str) -> Any:
+        if name == "_fields" or name not in self._fields:
+            raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
+        return self._fields[name]
+
+    def set(self, name: str, value: Any) -> None:
+        """
+        Set the field named `name` to `value`.
+        The length of `value` must be the number of instances,
+        and must agree with other existing fields in this object.
+        """
+        data_len = len(value)
+        if len(self._fields):
+            assert (
+                len(self) == data_len
+            ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
+        self._fields[name] = value
+
+    def has(self, name: str) -> bool:
+        """
+        Returns:
+            bool: whether the field called `name` exists.
+        """
+        return name in self._fields
+
+    def remove(self, name: str) -> None:
+        """
+        Remove the field called `name`.
+        """
+        del self._fields[name]
+
+    def get(self, name: str) -> Any:
+        """
+        Returns the field called `name`.
+        """
+        return self._fields[name]
+
+    def get_fields(self) -> Dict[str, Any]:
+        """
+        Returns:
+            dict: a dict which maps names (str) to data of the fields
+
+        Modifying the returned dict will modify this instance.
+        """
+        return self._fields
+
+    # Tensor-like methods
+    def to(self, *args: Any, **kwargs: Any) -> "Instances":
+        """
+        Returns:
+            Instances: all fields are called with a `to(device)`, if the field has this method.
+        """
+        ret = Instances(self._image_size)
+        for k, v in self._fields.items():
+            if hasattr(v, "to"):
+                v = v.to(*args, **kwargs)
+            ret.set(k, v)
+        return ret
+
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances":
+        """
+        Args:
+            item: an index-like object and will be used to index all the fields.
+
+        Returns:
+            If `item` is a string, return the data in the corresponding field.
+            Otherwise, returns an `Instances` where all fields are indexed by `item`.
+        """
+        if type(item) == int:
+            if item >= len(self) or item < -len(self):
+                raise IndexError("Instances index out of range!")
+            else:
+                item = slice(item, None, len(self))
+
+        ret = Instances(self._image_size)
+        for k, v in self._fields.items():
+            ret.set(k, v[item])
+        return ret
+
+    def __len__(self) -> int:
+        for v in self._fields.values():
+            # use __len__ because len() has to be int and is not friendly to tracing
+            return v.__len__()
+        raise NotImplementedError("Empty Instances does not support __len__!")
+
+    def __iter__(self):
+        raise NotImplementedError("`Instances` object is not iterable!")
+
+    @staticmethod
+    def cat(instance_lists: List["Instances"]) -> "Instances":
+        """
+        Args:
+            instance_lists (list[Instances])
+
+        Returns:
+            Instances
+        """
+        assert all(isinstance(i, Instances) for i in instance_lists)
+        assert len(instance_lists) > 0
+        if len(instance_lists) == 1:
+            return instance_lists[0]
+
+        image_size = instance_lists[0].image_size
+        for i in instance_lists[1:]:
+            assert i.image_size == image_size
+        ret = Instances(image_size)
+        for k in instance_lists[0]._fields.keys():
+            values = [i.get(k) for i in instance_lists]
+            v0 = values[0]
+            if isinstance(v0, torch.Tensor):
+                values = torch.cat(values, dim=0)
+            elif isinstance(v0, list):
+                values = list(itertools.chain(*values))
+            elif hasattr(type(v0), "cat"):
+                values = type(v0).cat(values)
+            else:
+                raise ValueError("Unsupported type {} for concatenation".format(type(v0)))
+            ret.set(k, values)
+        return ret
+
+    def __str__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={}, ".format(len(self))
+        s += "image_height={}, ".format(self._image_size[0])
+        s += "image_width={}, ".format(self._image_size[1])
+        s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items())))
+        return s
+
+    __repr__ = __str__
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/keypoints.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/keypoints.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d956a2d57e30be18ccef1fd3cf201d5ba3d8ab4
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/keypoints.py
@@ -0,0 +1,230 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from typing import Any, List, Tuple, Union
+import torch
+from torch.nn import functional as F
+
+from detectron2.utils.env import TORCH_VERSION
+
+if TORCH_VERSION < (1, 8):
+
+    def script_if_tracing(fn):
+        return fn
+
+
+else:
+    script_if_tracing = torch.jit.script_if_tracing
+
+
+class Keypoints:
+    """
+    Stores keypoint **annotation** data. GT Instances have a `gt_keypoints` property
+    containing the x,y location and visibility flag of each keypoint. This tensor has shape
+    (N, K, 3) where N is the number of instances and K is the number of keypoints per instance.
+
+    The visibility flag follows the COCO format and must be one of three integers:
+
+    * v=0: not labeled (in which case x=y=0)
+    * v=1: labeled but not visible
+    * v=2: labeled and visible
+    """
+
+    def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]):
+        """
+        Arguments:
+            keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint.
+                The shape should be (N, K, 3) where N is the number of
+                instances, and K is the number of keypoints per instance.
+        """
+        device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu")
+        keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
+        assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape
+        self.tensor = keypoints
+
+    def __len__(self) -> int:
+        return self.tensor.size(0)
+
+    def to(self, *args: Any, **kwargs: Any) -> "Keypoints":
+        return type(self)(self.tensor.to(*args, **kwargs))
+
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+
+    def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor:
+        """
+        Convert keypoint annotations to a heatmap of one-hot labels for training,
+        as described in :paper:`Mask R-CNN`.
+
+        Arguments:
+            boxes: Nx4 tensor, the boxes to draw the keypoints to
+
+        Returns:
+            heatmaps:
+                A tensor of shape (N, K), each element is integer spatial label
+                in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
+            valid:
+                A tensor of shape (N, K) containing whether each keypoint is in the roi or not.
+        """
+        return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size)
+
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints":
+        """
+        Create a new `Keypoints` by indexing on this `Keypoints`.
+
+        The following usage are allowed:
+
+        1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance.
+        2. `new_kpts = kpts[2:10]`: return a slice of key points.
+        3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor
+           with `length = len(kpts)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned Keypoints might share storage with this Keypoints,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return Keypoints([self.tensor[item]])
+        return Keypoints(self.tensor[item])
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.tensor))
+        return s
+
+
+# TODO make this nicer, this is a direct translation from C2 (but removing the inner loop)
+def _keypoints_to_heatmap(
+    keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space.
+
+    Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the
+    closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the
+    continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"):
+    d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
+
+    Arguments:
+        keypoints: tensor of keypoint locations in of shape (N, K, 3).
+        rois: Nx4 tensor of rois in xyxy format
+        heatmap_size: integer side length of square heatmap.
+
+    Returns:
+        heatmaps: A tensor of shape (N, K) containing an integer spatial label
+            in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
+        valid: A tensor of shape (N, K) containing whether each keypoint is in
+            the roi or not.
+    """
+
+    if rois.numel() == 0:
+        return rois.new().long(), rois.new().long()
+    offset_x = rois[:, 0]
+    offset_y = rois[:, 1]
+    scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
+    scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
+
+    offset_x = offset_x[:, None]
+    offset_y = offset_y[:, None]
+    scale_x = scale_x[:, None]
+    scale_y = scale_y[:, None]
+
+    x = keypoints[..., 0]
+    y = keypoints[..., 1]
+
+    x_boundary_inds = x == rois[:, 2][:, None]
+    y_boundary_inds = y == rois[:, 3][:, None]
+
+    x = (x - offset_x) * scale_x
+    x = x.floor().long()
+    y = (y - offset_y) * scale_y
+    y = y.floor().long()
+
+    x[x_boundary_inds] = heatmap_size - 1
+    y[y_boundary_inds] = heatmap_size - 1
+
+    valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
+    vis = keypoints[..., 2] > 0
+    valid = (valid_loc & vis).long()
+
+    lin_ind = y * heatmap_size + x
+    heatmaps = lin_ind * valid
+
+    return heatmaps, valid
+
+
+@script_if_tracing
+def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
+    """
+    Extract predicted keypoint locations from heatmaps.
+
+    Args:
+        maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for
+            each ROI and each keypoint.
+        rois (Tensor): (#ROIs, 4). The box of each ROI.
+
+    Returns:
+        Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to
+        (x, y, logit, score) for each keypoint.
+
+    When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate,
+    we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from
+    Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
+    """
+    # The decorator use of torch.no_grad() was not supported by torchscript.
+    # https://github.com/pytorch/pytorch/issues/44768
+    maps = maps.detach()
+    rois = rois.detach()
+
+    offset_x = rois[:, 0]
+    offset_y = rois[:, 1]
+
+    widths = (rois[:, 2] - rois[:, 0]).clamp(min=1)
+    heights = (rois[:, 3] - rois[:, 1]).clamp(min=1)
+    widths_ceil = widths.ceil()
+    heights_ceil = heights.ceil()
+
+    num_rois, num_keypoints = maps.shape[:2]
+    xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4)
+
+    width_corrections = widths / widths_ceil
+    height_corrections = heights / heights_ceil
+
+    keypoints_idx = torch.arange(num_keypoints, device=maps.device)
+
+    for i in range(num_rois):
+        outsize = (int(heights_ceil[i]), int(widths_ceil[i]))
+        roi_map = F.interpolate(
+            maps[[i]], size=outsize, mode="bicubic", align_corners=False
+        ).squeeze(
+            0
+        )  # #keypoints x H x W
+
+        # softmax over the spatial region
+        max_score, _ = roi_map.view(num_keypoints, -1).max(1)
+        max_score = max_score.view(num_keypoints, 1, 1)
+        tmp_full_resolution = (roi_map - max_score).exp_()
+        tmp_pool_resolution = (maps[i] - max_score).exp_()
+        # Produce scores over the region H x W, but normalize with POOL_H x POOL_W,
+        # so that the scores of objects of different absolute sizes will be more comparable
+        roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True)
+
+        w = roi_map.shape[2]
+        pos = roi_map.view(num_keypoints, -1).argmax(1)
+
+        x_int = pos % w
+        y_int = (pos - x_int) // w
+
+        assert (
+            roi_map_scores[keypoints_idx, y_int, x_int]
+            == roi_map_scores.view(num_keypoints, -1).max(1)[0]
+        ).all()
+
+        x = (x_int.float() + 0.5) * width_corrections[i]
+        y = (y_int.float() + 0.5) * height_corrections[i]
+
+        xy_preds[i, :, 0] = x + offset_x[i]
+        xy_preds[i, :, 1] = y + offset_y[i]
+        xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int]
+        xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int]
+
+    return xy_preds
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/masks.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/masks.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5d2abdbff9e8200680f1ebb90c1af0bf533c323
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/masks.py
@@ -0,0 +1,441 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import numpy as np
+from typing import Any, Iterator, List, Union
+import pycocotools.mask as mask_util
+import torch
+
+from detectron2.layers.roi_align import ROIAlign
+
+from .boxes import Boxes
+
+
+def polygon_area(x, y):
+    # Using the shoelace formula
+    # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
+    return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
+
+
+def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray:
+    """
+    Args:
+        polygons (list[ndarray]): each array has shape (Nx2,)
+        height, width (int)
+
+    Returns:
+        ndarray: a bool mask of shape (height, width)
+    """
+    assert len(polygons) > 0, "COCOAPI does not support empty polygons"
+    rles = mask_util.frPyObjects(polygons, height, width)
+    rle = mask_util.merge(rles)
+    return mask_util.decode(rle).astype(np.bool)
+
+
+def rasterize_polygons_within_box(
+    polygons: List[np.ndarray], box: np.ndarray, mask_size: int
+) -> torch.Tensor:
+    """
+    Rasterize the polygons into a mask image and
+    crop the mask content in the given box.
+    The cropped mask is resized to (mask_size, mask_size).
+
+    This function is used when generating training targets for mask head in Mask R-CNN.
+    Given original ground-truth masks for an image, new ground-truth mask
+    training targets in the size of `mask_size x mask_size`
+    must be provided for each predicted box. This function will be called to
+    produce such targets.
+
+    Args:
+        polygons (list[ndarray[float]]): a list of polygons, which represents an instance.
+        box: 4-element numpy array
+        mask_size (int):
+
+    Returns:
+        Tensor: BoolTensor of shape (mask_size, mask_size)
+    """
+    # 1. Shift the polygons w.r.t the boxes
+    w, h = box[2] - box[0], box[3] - box[1]
+
+    polygons = copy.deepcopy(polygons)
+    for p in polygons:
+        p[0::2] = p[0::2] - box[0]
+        p[1::2] = p[1::2] - box[1]
+
+    # 2. Rescale the polygons to the new box size
+    # max() to avoid division by small number
+    ratio_h = mask_size / max(h, 0.1)
+    ratio_w = mask_size / max(w, 0.1)
+
+    if ratio_h == ratio_w:
+        for p in polygons:
+            p *= ratio_h
+    else:
+        for p in polygons:
+            p[0::2] *= ratio_w
+            p[1::2] *= ratio_h
+
+    # 3. Rasterize the polygons with coco api
+    mask = polygons_to_bitmask(polygons, mask_size, mask_size)
+    mask = torch.from_numpy(mask)
+    return mask
+
+
+class BitMasks:
+    """
+    This class stores the segmentation masks for all objects in one image, in
+    the form of bitmaps.
+
+    Attributes:
+        tensor: bool Tensor of N,H,W, representing N instances in the image.
+    """
+
+    def __init__(self, tensor: Union[torch.Tensor, np.ndarray]):
+        """
+        Args:
+            tensor: bool Tensor of N,H,W, representing N instances in the image.
+        """
+        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
+        tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device)
+        assert tensor.dim() == 3, tensor.size()
+        self.image_size = tensor.shape[1:]
+        self.tensor = tensor
+
+    def to(self, *args: Any, **kwargs: Any) -> "BitMasks":
+        return BitMasks(self.tensor.to(*args, **kwargs))
+
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks":
+        """
+        Returns:
+            BitMasks: Create a new :class:`BitMasks` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask.
+        2. `new_masks = masks[2:10]`: return a slice of masks.
+        3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor
+           with `length = len(masks)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned object might share storage with this object,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return BitMasks(self.tensor[item].view(1, -1))
+        m = self.tensor[item]
+        assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format(
+            item, m.shape
+        )
+        return BitMasks(m)
+
+    def __iter__(self) -> torch.Tensor:
+        yield from self.tensor
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.tensor))
+        return s
+
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+
+    def nonempty(self) -> torch.Tensor:
+        """
+        Find masks that are non-empty.
+
+        Returns:
+            Tensor: a BoolTensor which represents
+                whether each mask is empty (False) or non-empty (True).
+        """
+        return self.tensor.flatten(1).any(dim=1)
+
+    @staticmethod
+    def from_polygon_masks(
+        polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int
+    ) -> "BitMasks":
+        """
+        Args:
+            polygon_masks (list[list[ndarray]] or PolygonMasks)
+            height, width (int)
+        """
+        if isinstance(polygon_masks, PolygonMasks):
+            polygon_masks = polygon_masks.polygons
+        masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks]
+        return BitMasks(torch.stack([torch.from_numpy(x) for x in masks]))
+
+    def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
+        """
+        Crop each bitmask by the given box, and resize results to (mask_size, mask_size).
+        This can be used to prepare training targets for Mask R-CNN.
+        It has less reconstruction error compared to rasterization with polygons.
+        However we observe no difference in accuracy,
+        but BitMasks requires more memory to store all the masks.
+
+        Args:
+            boxes (Tensor): Nx4 tensor storing the boxes for each mask
+            mask_size (int): the size of the rasterized mask.
+
+        Returns:
+            Tensor:
+                A bool tensor of shape (N, mask_size, mask_size), where
+                N is the number of predicted boxes for this image.
+        """
+        assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
+        device = self.tensor.device
+
+        batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None]
+        rois = torch.cat([batch_inds, boxes], dim=1)  # Nx5
+
+        bit_masks = self.tensor.to(dtype=torch.float32)
+        rois = rois.to(device=device)
+        output = (
+            ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True)
+            .forward(bit_masks[:, None, :, :], rois)
+            .squeeze(1)
+        )
+        output = output >= 0.5
+        return output
+
+    def get_bounding_boxes(self) -> Boxes:
+        """
+        Returns:
+            Boxes: tight bounding boxes around bitmasks.
+            If a mask is empty, it's bounding box will be all zero.
+        """
+        boxes = torch.zeros(self.tensor.shape[0], 4, dtype=torch.float32)
+        x_any = torch.any(self.tensor, dim=1)
+        y_any = torch.any(self.tensor, dim=2)
+        for idx in range(self.tensor.shape[0]):
+            x = torch.where(x_any[idx, :])[0]
+            y = torch.where(y_any[idx, :])[0]
+            if len(x) > 0 and len(y) > 0:
+                boxes[idx, :] = torch.as_tensor(
+                    [x[0], y[0], x[-1] + 1, y[-1] + 1], dtype=torch.float32
+                )
+        return Boxes(boxes)
+
+    @staticmethod
+    def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks":
+        """
+        Concatenates a list of BitMasks into a single BitMasks
+
+        Arguments:
+            bitmasks_list (list[BitMasks])
+
+        Returns:
+            BitMasks: the concatenated BitMasks
+        """
+        assert isinstance(bitmasks_list, (list, tuple))
+        assert len(bitmasks_list) > 0
+        assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list)
+
+        cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0))
+        return cat_bitmasks
+
+
+class PolygonMasks:
+    """
+    This class stores the segmentation masks for all objects in one image, in the form of polygons.
+
+    Attributes:
+        polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon.
+    """
+
+    def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]):
+        """
+        Arguments:
+            polygons (list[list[np.ndarray]]): The first
+                level of the list correspond to individual instances,
+                the second level to all the polygons that compose the
+                instance, and the third level to the polygon coordinates.
+                The third level array should have the format of
+                [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+        """
+        if not isinstance(polygons, list):
+            raise ValueError(
+                "Cannot create PolygonMasks: Expect a list of list of polygons per image. "
+                "Got '{}' instead.".format(type(polygons))
+            )
+
+        def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
+            # Use float64 for higher precision, because why not?
+            # Always put polygons on CPU (self.to is a no-op) since they
+            # are supposed to be small tensors.
+            # May need to change this assumption if GPU placement becomes useful
+            if isinstance(t, torch.Tensor):
+                t = t.cpu().numpy()
+            return np.asarray(t).astype("float64")
+
+        def process_polygons(
+            polygons_per_instance: List[Union[torch.Tensor, np.ndarray]]
+        ) -> List[np.ndarray]:
+            if not isinstance(polygons_per_instance, list):
+                raise ValueError(
+                    "Cannot create polygons: Expect a list of polygons per instance. "
+                    "Got '{}' instead.".format(type(polygons_per_instance))
+                )
+            # transform each polygon to a numpy array
+            polygons_per_instance = [_make_array(p) for p in polygons_per_instance]
+            for polygon in polygons_per_instance:
+                if len(polygon) % 2 != 0 or len(polygon) < 6:
+                    raise ValueError(f"Cannot create a polygon from {len(polygon)} coordinates.")
+            return polygons_per_instance
+
+        self.polygons: List[List[np.ndarray]] = [
+            process_polygons(polygons_per_instance) for polygons_per_instance in polygons
+        ]
+
+    def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks":
+        return self
+
+    @property
+    def device(self) -> torch.device:
+        return torch.device("cpu")
+
+    def get_bounding_boxes(self) -> Boxes:
+        """
+        Returns:
+            Boxes: tight bounding boxes around polygon masks.
+        """
+        boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32)
+        for idx, polygons_per_instance in enumerate(self.polygons):
+            minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32)
+            maxxy = torch.zeros(2, dtype=torch.float32)
+            for polygon in polygons_per_instance:
+                coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32)
+                minxy = torch.min(minxy, torch.min(coords, dim=0).values)
+                maxxy = torch.max(maxxy, torch.max(coords, dim=0).values)
+            boxes[idx, :2] = minxy
+            boxes[idx, 2:] = maxxy
+        return Boxes(boxes)
+
+    def nonempty(self) -> torch.Tensor:
+        """
+        Find masks that are non-empty.
+
+        Returns:
+            Tensor:
+                a BoolTensor which represents whether each mask is empty (False) or not (True).
+        """
+        keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons]
+        return torch.from_numpy(np.asarray(keep, dtype=np.bool))
+
+    def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks":
+        """
+        Support indexing over the instances and return a `PolygonMasks` object.
+        `item` can be:
+
+        1. An integer. It will return an object with only one instance.
+        2. A slice. It will return an object with the selected instances.
+        3. A list[int]. It will return an object with the selected instances,
+           correpsonding to the indices in the list.
+        4. A vector mask of type BoolTensor, whose length is num_instances.
+           It will return an object with the instances whose mask is nonzero.
+        """
+        if isinstance(item, int):
+            selected_polygons = [self.polygons[item]]
+        elif isinstance(item, slice):
+            selected_polygons = self.polygons[item]
+        elif isinstance(item, list):
+            selected_polygons = [self.polygons[i] for i in item]
+        elif isinstance(item, torch.Tensor):
+            # Polygons is a list, so we have to move the indices back to CPU.
+            if item.dtype == torch.bool:
+                assert item.dim() == 1, item.shape
+                item = torch.nonzero(item, as_tuple=False).squeeze(1).cpu().numpy().tolist()
+            elif item.dtype in [torch.int32, torch.int64]:
+                item = item.cpu().numpy().tolist()
+            else:
+                raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype))
+            selected_polygons = [self.polygons[i] for i in item]
+        return PolygonMasks(selected_polygons)
+
+    def __iter__(self) -> Iterator[List[np.ndarray]]:
+        """
+        Yields:
+            list[ndarray]: the polygons for one instance.
+            Each Tensor is a float64 vector representing a polygon.
+        """
+        return iter(self.polygons)
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.polygons))
+        return s
+
+    def __len__(self) -> int:
+        return len(self.polygons)
+
+    def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
+        """
+        Crop each mask by the given box, and resize results to (mask_size, mask_size).
+        This can be used to prepare training targets for Mask R-CNN.
+
+        Args:
+            boxes (Tensor): Nx4 tensor storing the boxes for each mask
+            mask_size (int): the size of the rasterized mask.
+
+        Returns:
+            Tensor: A bool tensor of shape (N, mask_size, mask_size), where
+            N is the number of predicted boxes for this image.
+        """
+        assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
+
+        device = boxes.device
+        # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise
+        # (several small tensors for representing a single instance mask)
+        boxes = boxes.to(torch.device("cpu"))
+
+        results = [
+            rasterize_polygons_within_box(poly, box.numpy(), mask_size)
+            for poly, box in zip(self.polygons, boxes)
+        ]
+        """
+        poly: list[list[float]], the polygons for one instance
+        box: a tensor of shape (4,)
+        """
+        if len(results) == 0:
+            return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device)
+        return torch.stack(results, dim=0).to(device=device)
+
+    def area(self):
+        """
+        Computes area of the mask.
+        Only works with Polygons, using the shoelace formula:
+        https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
+
+        Returns:
+            Tensor: a vector, area for each instance
+        """
+
+        area = []
+        for polygons_per_instance in self.polygons:
+            area_per_instance = 0
+            for p in polygons_per_instance:
+                area_per_instance += polygon_area(p[0::2], p[1::2])
+            area.append(area_per_instance)
+
+        return torch.tensor(area)
+
+    @staticmethod
+    def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks":
+        """
+        Concatenates a list of PolygonMasks into a single PolygonMasks
+
+        Arguments:
+            polymasks_list (list[PolygonMasks])
+
+        Returns:
+            PolygonMasks: the concatenated PolygonMasks
+        """
+        assert isinstance(polymasks_list, (list, tuple))
+        assert len(polymasks_list) > 0
+        assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list)
+
+        cat_polymasks = type(polymasks_list[0])(
+            list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list))
+        )
+        return cat_polymasks
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/rotated_boxes.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/rotated_boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f48b40560f2f409b20d87bb1ff448bf44e090d2
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/structures/rotated_boxes.py
@@ -0,0 +1,505 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+from typing import List, Tuple
+import torch
+
+from detectron2.layers.rotated_boxes import pairwise_iou_rotated
+
+from .boxes import Boxes, _maybe_jit_unused
+
+
+class RotatedBoxes(Boxes):
+    """
+    This structure stores a list of rotated boxes as a Nx5 torch.Tensor.
+    It supports some common methods about boxes
+    (`area`, `clip`, `nonempty`, etc),
+    and also behaves like a Tensor
+    (support indexing, `to(device)`, `.device`, and iteration over all boxes)
+    """
+
+    def __init__(self, tensor: torch.Tensor):
+        """
+        Args:
+            tensor (Tensor[float]): a Nx5 matrix.  Each row is
+                (x_center, y_center, width, height, angle),
+                in which angle is represented in degrees.
+                While there's no strict range restriction for it,
+                the recommended principal range is between [-180, 180) degrees.
+
+        Assume we have a horizontal box B = (x_center, y_center, width, height),
+        where width is along the x-axis and height is along the y-axis.
+        The rotated box B_rot (x_center, y_center, width, height, angle)
+        can be seen as:
+
+        1. When angle == 0:
+           B_rot == B
+        2. When angle > 0:
+           B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW;
+        3. When angle < 0:
+           B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW.
+
+        Mathematically, since the right-handed coordinate system for image space
+        is (y, x), where y is top->down and x is left->right, the 4 vertices of the
+        rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from
+        the vertices of the horizontal rectangle :math:`(y_i, x_i)` (i = 1, 2, 3, 4)
+        in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians,
+        :math:`(y_c, x_c)` is the center of the rectangle):
+
+        .. math::
+
+            yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c,
+
+            xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c,
+
+        which is the standard rigid-body rotation transformation.
+
+        Intuitively, the angle is
+        (1) the rotation angle from y-axis in image space
+        to the height vector (top->down in the box's local coordinate system)
+        of the box in CCW, and
+        (2) the rotation angle from x-axis in image space
+        to the width vector (left->right in the box's local coordinate system)
+        of the box in CCW.
+
+        More intuitively, consider the following horizontal box ABCD represented
+        in (x1, y1, x2, y2): (3, 2, 7, 4),
+        covering the [3, 7] x [2, 4] region of the continuous coordinate system
+        which looks like this:
+
+        .. code:: none
+
+            O--------> x
+            |
+            |  A---B
+            |  |   |
+            |  D---C
+            |
+            v y
+
+        Note that each capital letter represents one 0-dimensional geometric point
+        instead of a 'square pixel' here.
+
+        In the example above, using (x, y) to represent a point we have:
+
+        .. math::
+
+            O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4)
+
+        We name vector AB = vector DC as the width vector in box's local coordinate system, and
+        vector AD = vector BC as the height vector in box's local coordinate system. Initially,
+        when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis
+        in the image space, respectively.
+
+        For better illustration, we denote the center of the box as E,
+
+        .. code:: none
+
+            O--------> x
+            |
+            |  A---B
+            |  | E |
+            |  D---C
+            |
+            v y
+
+        where the center E = ((3+7)/2, (2+4)/2) = (5, 3).
+
+        Also,
+
+        .. math::
+
+            width = |AB| = |CD| = 7 - 3 = 4,
+            height = |AD| = |BC| = 4 - 2 = 2.
+
+        Therefore, the corresponding representation for the same shape in rotated box in
+        (x_center, y_center, width, height, angle) format is:
+
+        (5, 3, 4, 2, 0),
+
+        Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees
+        CCW (counter-clockwise) by definition. It looks like this:
+
+        .. code:: none
+
+            O--------> x
+            |   B-C
+            |   | |
+            |   |E|
+            |   | |
+            |   A-D
+            v y
+
+        The center E is still located at the same point (5, 3), while the vertices
+        ABCD are rotated by 90 degrees CCW with regard to E:
+        A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5)
+
+        Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to
+        vector AD or vector BC (the top->down height vector in box's local coordinate system),
+        or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right
+        width vector in box's local coordinate system).
+
+        .. math::
+
+            width = |AB| = |CD| = 5 - 1 = 4,
+            height = |AD| = |BC| = 6 - 4 = 2.
+
+        Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise)
+        by definition? It looks like this:
+
+        .. code:: none
+
+            O--------> x
+            |   D-A
+            |   | |
+            |   |E|
+            |   | |
+            |   C-B
+            v y
+
+        The center E is still located at the same point (5, 3), while the vertices
+        ABCD are rotated by 90 degrees CW with regard to E:
+        A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1)
+
+        .. math::
+
+            width = |AB| = |CD| = 5 - 1 = 4,
+            height = |AD| = |BC| = 6 - 4 = 2.
+
+        This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU
+        will be 1. However, these two will generate different RoI Pooling results and
+        should not be treated as an identical box.
+
+        On the other hand, it's easy to see that (X, Y, W, H, A) is identical to
+        (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be
+        identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is
+        equivalent to rotating the same shape 90 degrees CW.
+
+        We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180):
+
+        .. code:: none
+
+            O--------> x
+            |
+            |  C---D
+            |  | E |
+            |  B---A
+            |
+            v y
+
+        .. math::
+
+            A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2),
+
+            width = |AB| = |CD| = 7 - 3 = 4,
+            height = |AD| = |BC| = 4 - 2 = 2.
+
+        Finally, this is a very inaccurate (heavily quantized) illustration of
+        how (5, 3, 4, 2, 60) looks like in case anyone wonders:
+
+        .. code:: none
+
+            O--------> x
+            |     B\
+            |    /  C
+            |   /E /
+            |  A  /
+            |   `D
+            v y
+
+        It's still a rectangle with center of (5, 3), width of 4 and height of 2,
+        but its angle (and thus orientation) is somewhere between
+        (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90).
+        """
+        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that does not depend on
+            # the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size()
+
+        self.tensor = tensor
+
+    def clone(self) -> "RotatedBoxes":
+        """
+        Clone the RotatedBoxes.
+
+        Returns:
+            RotatedBoxes
+        """
+        return RotatedBoxes(self.tensor.clone())
+
+    @_maybe_jit_unused
+    def to(self, device: torch.device):
+        # Boxes are assumed float32 and does not support to(dtype)
+        return RotatedBoxes(self.tensor.to(device=device))
+
+    def area(self) -> torch.Tensor:
+        """
+        Computes the area of all the boxes.
+
+        Returns:
+            torch.Tensor: a vector with areas of each box.
+        """
+        box = self.tensor
+        area = box[:, 2] * box[:, 3]
+        return area
+
+    def normalize_angles(self) -> None:
+        """
+        Restrict angles to the range of [-180, 180) degrees
+        """
+        self.tensor[:, 4] = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0
+
+    def clip(self, box_size: Tuple[int, int], clip_angle_threshold: float = 1.0) -> None:
+        """
+        Clip (in place) the boxes by limiting x coordinates to the range [0, width]
+        and y coordinates to the range [0, height].
+
+        For RRPN:
+        Only clip boxes that are almost horizontal with a tolerance of
+        clip_angle_threshold to maintain backward compatibility.
+
+        Rotated boxes beyond this threshold are not clipped for two reasons:
+
+        1. There are potentially multiple ways to clip a rotated box to make it
+           fit within the image.
+        2. It's tricky to make the entire rectangular box fit within the image
+           and still be able to not leave out pixels of interest.
+
+        Therefore we rely on ops like RoIAlignRotated to safely handle this.
+
+        Args:
+            box_size (height, width): The clipping box's size.
+            clip_angle_threshold:
+                Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees),
+                we do the clipping as horizontal boxes.
+        """
+        h, w = box_size
+
+        # normalize angles to be within (-180, 180] degrees
+        self.normalize_angles()
+
+        idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0]
+
+        # convert to (x1, y1, x2, y2)
+        x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0
+        y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0
+        x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0
+        y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0
+
+        # clip
+        x1.clamp_(min=0, max=w)
+        y1.clamp_(min=0, max=h)
+        x2.clamp_(min=0, max=w)
+        y2.clamp_(min=0, max=h)
+
+        # convert back to (xc, yc, w, h)
+        self.tensor[idx, 0] = (x1 + x2) / 2.0
+        self.tensor[idx, 1] = (y1 + y2) / 2.0
+        # make sure widths and heights do not increase due to numerical errors
+        self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1)
+        self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1)
+
+    def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
+        """
+        Find boxes that are non-empty.
+        A box is considered empty, if either of its side is no larger than threshold.
+
+        Returns:
+            Tensor: a binary vector which represents
+            whether each box is empty (False) or non-empty (True).
+        """
+        box = self.tensor
+        widths = box[:, 2]
+        heights = box[:, 3]
+        keep = (widths > threshold) & (heights > threshold)
+        return keep
+
+    def __getitem__(self, item) -> "RotatedBoxes":
+        """
+        Returns:
+            RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box.
+        2. `new_boxes = boxes[2:10]`: return a slice of boxes.
+        3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor
+           with `length = len(boxes)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned RotatedBoxes might share storage with this RotatedBoxes,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return RotatedBoxes(self.tensor[item].view(1, -1))
+        b = self.tensor[item]
+        assert b.dim() == 2, "Indexing on RotatedBoxes with {} failed to return a matrix!".format(
+            item
+        )
+        return RotatedBoxes(b)
+
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+
+    def __repr__(self) -> str:
+        return "RotatedBoxes(" + str(self.tensor) + ")"
+
+    def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor:
+        """
+        Args:
+            box_size (height, width): Size of the reference box covering
+                [0, width] x [0, height]
+            boundary_threshold (int): Boxes that extend beyond the reference box
+                boundary by more than boundary_threshold are considered "outside".
+
+        For RRPN, it might not be necessary to call this function since it's common
+        for rotated box to extend to outside of the image boundaries
+        (the clip function only clips the near-horizontal boxes)
+
+        Returns:
+            a binary vector, indicating whether each box is inside the reference box.
+        """
+        height, width = box_size
+
+        cnt_x = self.tensor[..., 0]
+        cnt_y = self.tensor[..., 1]
+        half_w = self.tensor[..., 2] / 2.0
+        half_h = self.tensor[..., 3] / 2.0
+        a = self.tensor[..., 4]
+        c = torch.abs(torch.cos(a * math.pi / 180.0))
+        s = torch.abs(torch.sin(a * math.pi / 180.0))
+        # This basically computes the horizontal bounding rectangle of the rotated box
+        max_rect_dx = c * half_w + s * half_h
+        max_rect_dy = c * half_h + s * half_w
+
+        inds_inside = (
+            (cnt_x - max_rect_dx >= -boundary_threshold)
+            & (cnt_y - max_rect_dy >= -boundary_threshold)
+            & (cnt_x + max_rect_dx < width + boundary_threshold)
+            & (cnt_y + max_rect_dy < height + boundary_threshold)
+        )
+
+        return inds_inside
+
+    def get_centers(self) -> torch.Tensor:
+        """
+        Returns:
+            The box centers in a Nx2 array of (x, y).
+        """
+        return self.tensor[:, :2]
+
+    def scale(self, scale_x: float, scale_y: float) -> None:
+        """
+        Scale the rotated box with horizontal and vertical scaling factors
+        Note: when scale_factor_x != scale_factor_y,
+        the rotated box does not preserve the rectangular shape when the angle
+        is not a multiple of 90 degrees under resize transformation.
+        Instead, the shape is a parallelogram (that has skew)
+        Here we make an approximation by fitting a rotated rectangle to the parallelogram.
+        """
+        self.tensor[:, 0] *= scale_x
+        self.tensor[:, 1] *= scale_y
+        theta = self.tensor[:, 4] * math.pi / 180.0
+        c = torch.cos(theta)
+        s = torch.sin(theta)
+
+        # In image space, y is top->down and x is left->right
+        # Consider the local coordintate system for the rotated box,
+        # where the box center is located at (0, 0), and the four vertices ABCD are
+        # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2)
+        # the midpoint of the left edge AD of the rotated box E is:
+        # E = (A+D)/2 = (-w / 2, 0)
+        # the midpoint of the top edge AB of the rotated box F is:
+        # F(0, -h / 2)
+        # To get the old coordinates in the global system, apply the rotation transformation
+        # (Note: the right-handed coordinate system for image space is yOx):
+        # (old_x, old_y) = (s * y + c * x, c * y - s * x)
+        # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2)
+        # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2)
+        # After applying the scaling factor (sfx, sfy):
+        # E(new) = (-sfx * c * w / 2, sfy * s * w / 2)
+        # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2)
+        # The new width after scaling tranformation becomes:
+
+        # w(new) = |E(new) - O| * 2
+        #        = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2
+        #        = sqrt[(sfx * c)^2 + (sfy * s)^2] * w
+        # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2]
+        #
+        # For example,
+        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x;
+        # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y
+        self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2)
+
+        # h(new) = |F(new) - O| * 2
+        #        = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2
+        #        = sqrt[(sfx * s)^2 + (sfy * c)^2] * h
+        # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2]
+        #
+        # For example,
+        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y;
+        # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x
+        self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2)
+
+        # The angle is the rotation angle from y-axis in image space to the height
+        # vector (top->down in the box's local coordinate system) of the box in CCW.
+        #
+        # angle(new) = angle_yOx(O - F(new))
+        #            = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) )
+        #            = atan2(sfx * s * h / 2, sfy * c * h / 2)
+        #            = atan2(sfx * s, sfy * c)
+        #
+        # For example,
+        # when sfx == sfy, angle(new) == atan2(s, c) == angle(old)
+        self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi
+
+    @classmethod
+    @_maybe_jit_unused
+    def cat(cls, boxes_list: List["RotatedBoxes"]) -> "RotatedBoxes":
+        """
+        Concatenates a list of RotatedBoxes into a single RotatedBoxes
+
+        Arguments:
+            boxes_list (list[RotatedBoxes])
+
+        Returns:
+            RotatedBoxes: the concatenated RotatedBoxes
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all([isinstance(box, RotatedBoxes) for box in boxes_list])
+
+        # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
+        return cat_boxes
+
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+
+    @torch.jit.unused
+    def __iter__(self):
+        """
+        Yield a box as a Tensor of shape (5,) at a time.
+        """
+        yield from self.tensor
+
+
+def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None:
+    """
+    Given two lists of rotated boxes of size N and M,
+    compute the IoU (intersection over union)
+    between **all** N x M pairs of boxes.
+    The box order must be (x_center, y_center, width, height, angle).
+
+    Args:
+        boxes1, boxes2 (RotatedBoxes):
+            two `RotatedBoxes`. Contains N & M rotated boxes, respectively.
+
+    Returns:
+        Tensor: IoU, sized [N,M].
+    """
+
+    return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/__init__.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9020c2df23e2af280b7bb168b996ae9eaf312eb8
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/analysis.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..51b453cabb2f369e470296d468955432446de1a5
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/analysis.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# -*- coding: utf-8 -*-
+
+import typing
+import fvcore
+from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table
+from torch import nn
+
+from detectron2.export import TracingAdapter
+
+__all__ = [
+    "activation_count_operators",
+    "flop_count_operators",
+    "parameter_count_table",
+    "parameter_count",
+]
+
+FLOPS_MODE = "flops"
+ACTIVATIONS_MODE = "activations"
+
+
+# Some extra ops to ignore from counting, including elementwise and reduction ops
+_IGNORED_OPS = {
+    "aten::add",
+    "aten::add_",
+    "aten::argmax",
+    "aten::argsort",
+    "aten::batch_norm",
+    "aten::constant_pad_nd",
+    "aten::div",
+    "aten::div_",
+    "aten::exp",
+    "aten::log2",
+    "aten::max_pool2d",
+    "aten::meshgrid",
+    "aten::mul",
+    "aten::mul_",
+    "aten::neg",
+    "aten::nonzero_numpy",
+    "aten::reciprocal",
+    "aten::rsub",
+    "aten::sigmoid",
+    "aten::sigmoid_",
+    "aten::softmax",
+    "aten::sort",
+    "aten::sqrt",
+    "aten::sub",
+    "torchvision::nms",  # TODO estimate flop for nms
+}
+
+
+class FlopCountAnalysis(fvcore.nn.FlopCountAnalysis):
+    """
+    Same as :class:`fvcore.nn.FlopCountAnalysis`, but supports detectron2 models.
+    """
+
+    def __init__(self, model, inputs):
+        """
+        Args:
+            model (nn.Module):
+            inputs (Any): inputs of the given model. Does not have to be tuple of tensors.
+        """
+        wrapper = TracingAdapter(model, inputs, allow_non_tensor=True)
+        super().__init__(wrapper, wrapper.flattened_inputs)
+        self.set_op_handle(**{k: None for k in _IGNORED_OPS})
+
+
+def flop_count_operators(model: nn.Module, inputs: list) -> typing.DefaultDict[str, float]:
+    """
+    Implement operator-level flops counting using jit.
+    This is a wrapper of :func:`fvcore.nn.flop_count` and adds supports for standard
+    detection models in detectron2.
+    Please use :class:`FlopCountAnalysis` for more advanced functionalities.
+
+    Note:
+        The function runs the input through the model to compute flops.
+        The flops of a detection model is often input-dependent, for example,
+        the flops of box & mask head depends on the number of proposals &
+        the number of detected objects.
+        Therefore, the flops counting using a single input may not accurately
+        reflect the computation cost of a model. It's recommended to average
+        across a number of inputs.
+
+    Args:
+        model: a detectron2 model that takes `list[dict]` as input.
+        inputs (list[dict]): inputs to model, in detectron2's standard format.
+            Only "image" key will be used.
+        supported_ops (dict[str, Handle]): see documentation of :func:`fvcore.nn.flop_count`
+
+    Returns:
+        Counter: Gflop count per operator
+    """
+    old_train = model.training
+    model.eval()
+    ret = FlopCountAnalysis(model, inputs).by_operator()
+    model.train(old_train)
+    return {k: v / 1e9 for k, v in ret.items()}
+
+
+def activation_count_operators(
+    model: nn.Module, inputs: list, **kwargs
+) -> typing.DefaultDict[str, float]:
+    """
+    Implement operator-level activations counting using jit.
+    This is a wrapper of fvcore.nn.activation_count, that supports standard detection models
+    in detectron2.
+
+    Note:
+        The function runs the input through the model to compute activations.
+        The activations of a detection model is often input-dependent, for example,
+        the activations of box & mask head depends on the number of proposals &
+        the number of detected objects.
+
+    Args:
+        model: a detectron2 model that takes `list[dict]` as input.
+        inputs (list[dict]): inputs to model, in detectron2's standard format.
+            Only "image" key will be used.
+
+    Returns:
+        Counter: activation count per operator
+    """
+    return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs)
+
+
+def _wrapper_count_operators(
+    model: nn.Module, inputs: list, mode: str, **kwargs
+) -> typing.DefaultDict[str, float]:
+    # ignore some ops
+    supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS}
+    supported_ops.update(kwargs.pop("supported_ops", {}))
+    kwargs["supported_ops"] = supported_ops
+
+    assert len(inputs) == 1, "Please use batch size=1"
+    tensor_input = inputs[0]["image"]
+    inputs = [{"image": tensor_input}]  # remove other keys, in case there are any
+
+    old_train = model.training
+    if isinstance(model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)):
+        model = model.module
+    wrapper = TracingAdapter(model, inputs)
+    wrapper.eval()
+    if mode == FLOPS_MODE:
+        ret = flop_count(wrapper, (tensor_input,), **kwargs)
+    elif mode == ACTIVATIONS_MODE:
+        ret = activation_count(wrapper, (tensor_input,), **kwargs)
+    else:
+        raise NotImplementedError("Count for mode {} is not supported yet.".format(mode))
+    # compatible with change in fvcore
+    if isinstance(ret, tuple):
+        ret = ret[0]
+    model.train(old_train)
+    return ret
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/collect_env.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/collect_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc079504f6309ce1e4a276e35c5526d8cd14eb3f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/collect_env.py
@@ -0,0 +1,209 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import importlib
+import numpy as np
+import os
+import re
+import subprocess
+import sys
+from collections import defaultdict
+import PIL
+import torch
+import torchvision
+from tabulate import tabulate
+
+__all__ = ["collect_env_info"]
+
+
+def collect_torch_env():
+    try:
+        import torch.__config__
+
+        return torch.__config__.show()
+    except ImportError:
+        # compatible with older versions of pytorch
+        from torch.utils.collect_env import get_pretty_env_info
+
+        return get_pretty_env_info()
+
+
+def get_env_module():
+    var_name = "DETECTRON2_ENV_MODULE"
+    return var_name, os.environ.get(var_name, "<not set>")
+
+
+def detect_compute_compatibility(CUDA_HOME, so_file):
+    try:
+        cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump")
+        if os.path.isfile(cuobjdump):
+            output = subprocess.check_output(
+                "'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True
+            )
+            output = output.decode("utf-8").strip().split("\n")
+            arch = []
+            for line in output:
+                line = re.findall(r"\.sm_([0-9]*)\.", line)[0]
+                arch.append(".".join(line))
+            arch = sorted(set(arch))
+            return ", ".join(arch)
+        else:
+            return so_file + "; cannot find cuobjdump"
+    except Exception:
+        # unhandled failure
+        return so_file
+
+
+def collect_env_info():
+    has_gpu = torch.cuda.is_available()  # true for both CUDA & ROCM
+    torch_version = torch.__version__
+
+    # NOTE that CUDA_HOME/ROCM_HOME could be None even when CUDA runtime libs are functional
+    from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME
+
+    has_rocm = False
+    if (getattr(torch.version, "hip", None) is not None) and (ROCM_HOME is not None):
+        has_rocm = True
+    has_cuda = has_gpu and (not has_rocm)
+
+    data = []
+    data.append(("sys.platform", sys.platform))  # check-template.yml depends on it
+    data.append(("Python", sys.version.replace("\n", "")))
+    data.append(("numpy", np.__version__))
+
+    try:
+        import detectron2  # noqa
+
+        data.append(
+            ("detectron2", detectron2.__version__ + " @" + os.path.dirname(detectron2.__file__))
+        )
+    except ImportError:
+        data.append(("detectron2", "failed to import"))
+
+    try:
+        import detectron2._C as _C
+    except ImportError as e:
+        data.append(("detectron2._C", f"not built correctly: {e}"))
+
+        # print system compilers when extension fails to build
+        if sys.platform != "win32":  # don't know what to do for windows
+            try:
+                # this is how torch/utils/cpp_extensions.py choose compiler
+                cxx = os.environ.get("CXX", "c++")
+                cxx = subprocess.check_output("'{}' --version".format(cxx), shell=True)
+                cxx = cxx.decode("utf-8").strip().split("\n")[0]
+            except subprocess.SubprocessError:
+                cxx = "Not found"
+            data.append(("Compiler ($CXX)", cxx))
+
+            if has_cuda and CUDA_HOME is not None:
+                try:
+                    nvcc = os.path.join(CUDA_HOME, "bin", "nvcc")
+                    nvcc = subprocess.check_output("'{}' -V".format(nvcc), shell=True)
+                    nvcc = nvcc.decode("utf-8").strip().split("\n")[-1]
+                except subprocess.SubprocessError:
+                    nvcc = "Not found"
+                data.append(("CUDA compiler", nvcc))
+        if has_cuda and sys.platform != "win32":
+            try:
+                so_file = importlib.util.find_spec("detectron2._C").origin
+            except ImportError:
+                pass
+            else:
+                data.append(
+                    ("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, so_file))
+                )
+    else:
+        # print compilers that are used to build extension
+        data.append(("Compiler", _C.get_compiler_version()))
+        data.append(("CUDA compiler", _C.get_cuda_version()))  # cuda or hip
+        if has_cuda and getattr(_C, "has_cuda", lambda: True)():
+            data.append(
+                ("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, _C.__file__))
+            )
+
+    data.append(get_env_module())
+    data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__)))
+    data.append(("PyTorch debug build", torch.version.debug))
+
+    data.append(("GPU available", has_gpu))
+    if has_gpu:
+        devices = defaultdict(list)
+        for k in range(torch.cuda.device_count()):
+            cap = ".".join((str(x) for x in torch.cuda.get_device_capability(k)))
+            name = torch.cuda.get_device_name(k) + f" (arch={cap})"
+            devices[name].append(str(k))
+        for name, devids in devices.items():
+            data.append(("GPU " + ",".join(devids), name))
+
+        if has_rocm:
+            msg = " - invalid!" if not (ROCM_HOME and os.path.isdir(ROCM_HOME)) else ""
+            data.append(("ROCM_HOME", str(ROCM_HOME) + msg))
+        else:
+            msg = " - invalid!" if not (CUDA_HOME and os.path.isdir(CUDA_HOME)) else ""
+            data.append(("CUDA_HOME", str(CUDA_HOME) + msg))
+
+            cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
+            if cuda_arch_list:
+                data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
+    data.append(("Pillow", PIL.__version__))
+
+    try:
+        data.append(
+            (
+                "torchvision",
+                str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__),
+            )
+        )
+        if has_cuda:
+            try:
+                torchvision_C = importlib.util.find_spec("torchvision._C").origin
+                msg = detect_compute_compatibility(CUDA_HOME, torchvision_C)
+                data.append(("torchvision arch flags", msg))
+            except ImportError:
+                data.append(("torchvision._C", "Not found"))
+    except AttributeError:
+        data.append(("torchvision", "unknown"))
+
+    try:
+        import fvcore
+
+        data.append(("fvcore", fvcore.__version__))
+    except ImportError:
+        pass
+
+    try:
+        import iopath
+
+        data.append(("iopath", iopath.__version__))
+    except (ImportError, AttributeError):
+        pass
+
+    try:
+        import cv2
+
+        data.append(("cv2", cv2.__version__))
+    except ImportError:
+        data.append(("cv2", "Not found"))
+    env_str = tabulate(data) + "\n"
+    env_str += collect_torch_env()
+    return env_str
+
+
+if __name__ == "__main__":
+    try:
+        from detectron2.utils.collect_env import collect_env_info as f
+
+        print(f())
+    except ImportError:
+        print(collect_env_info())
+
+    if torch.cuda.is_available():
+        for k in range(torch.cuda.device_count()):
+            device = f"cuda:{k}"
+            try:
+                x = torch.tensor([1, 2.0], dtype=torch.float32)
+                x = x.to(device)
+            except Exception as e:
+                print(
+                    f"Unable to copy tensor to device={device}: {e}. "
+                    "Your CUDA environment is broken."
+                )
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/colormap.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/colormap.py
new file mode 100644
index 0000000000000000000000000000000000000000..150ccc372262ec4de0b36db66a303cae9495e67f
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/colormap.py
@@ -0,0 +1,140 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+An awesome colormap for really neat visualizations.
+Copied from Detectron, and removed gray colors.
+"""
+
+import numpy as np
+
+__all__ = ["colormap", "random_color"]
+
+# fmt: off
+# RGB:
+_COLORS = np.array(
+    [
+        0.000, 0.447, 0.741,
+        0.850, 0.325, 0.098,
+        0.929, 0.694, 0.125,
+        0.494, 0.184, 0.556,
+        0.466, 0.674, 0.188,
+        0.301, 0.745, 0.933,
+        0.635, 0.078, 0.184,
+        0.300, 0.300, 0.300,
+        0.600, 0.600, 0.600,
+        1.000, 0.000, 0.000,
+        1.000, 0.500, 0.000,
+        0.749, 0.749, 0.000,
+        0.000, 1.000, 0.000,
+        0.000, 0.000, 1.000,
+        0.667, 0.000, 1.000,
+        0.333, 0.333, 0.000,
+        0.333, 0.667, 0.000,
+        0.333, 1.000, 0.000,
+        0.667, 0.333, 0.000,
+        0.667, 0.667, 0.000,
+        0.667, 1.000, 0.000,
+        1.000, 0.333, 0.000,
+        1.000, 0.667, 0.000,
+        1.000, 1.000, 0.000,
+        0.000, 0.333, 0.500,
+        0.000, 0.667, 0.500,
+        0.000, 1.000, 0.500,
+        0.333, 0.000, 0.500,
+        0.333, 0.333, 0.500,
+        0.333, 0.667, 0.500,
+        0.333, 1.000, 0.500,
+        0.667, 0.000, 0.500,
+        0.667, 0.333, 0.500,
+        0.667, 0.667, 0.500,
+        0.667, 1.000, 0.500,
+        1.000, 0.000, 0.500,
+        1.000, 0.333, 0.500,
+        1.000, 0.667, 0.500,
+        1.000, 1.000, 0.500,
+        0.000, 0.333, 1.000,
+        0.000, 0.667, 1.000,
+        0.000, 1.000, 1.000,
+        0.333, 0.000, 1.000,
+        0.333, 0.333, 1.000,
+        0.333, 0.667, 1.000,
+        0.333, 1.000, 1.000,
+        0.667, 0.000, 1.000,
+        0.667, 0.333, 1.000,
+        0.667, 0.667, 1.000,
+        0.667, 1.000, 1.000,
+        1.000, 0.000, 1.000,
+        1.000, 0.333, 1.000,
+        1.000, 0.667, 1.000,
+        0.333, 0.000, 0.000,
+        0.500, 0.000, 0.000,
+        0.667, 0.000, 0.000,
+        0.833, 0.000, 0.000,
+        1.000, 0.000, 0.000,
+        0.000, 0.167, 0.000,
+        0.000, 0.333, 0.000,
+        0.000, 0.500, 0.000,
+        0.000, 0.667, 0.000,
+        0.000, 0.833, 0.000,
+        0.000, 1.000, 0.000,
+        0.000, 0.000, 0.167,
+        0.000, 0.000, 0.333,
+        0.000, 0.000, 0.500,
+        0.000, 0.000, 0.667,
+        0.000, 0.000, 0.833,
+        0.000, 0.000, 1.000,
+        0.000, 0.000, 0.000,
+        0.143, 0.143, 0.143,
+        0.857, 0.857, 0.857,
+        1.000, 1.000, 1.000
+    ]
+).astype(np.float32).reshape(-1, 3)
+# fmt: on
+
+
+def colormap(rgb=False, maximum=255):
+    """
+    Args:
+        rgb (bool): whether to return RGB colors or BGR colors.
+        maximum (int): either 255 or 1
+
+    Returns:
+        ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
+    """
+    assert maximum in [255, 1], maximum
+    c = _COLORS * maximum
+    if not rgb:
+        c = c[:, ::-1]
+    return c
+
+
+def random_color(rgb=False, maximum=255):
+    """
+    Args:
+        rgb (bool): whether to return RGB colors or BGR colors.
+        maximum (int): either 255 or 1
+
+    Returns:
+        ndarray: a vector of 3 numbers
+    """
+    idx = np.random.randint(0, len(_COLORS))
+    ret = _COLORS[idx] * maximum
+    if not rgb:
+        ret = ret[::-1]
+    return ret
+
+
+if __name__ == "__main__":
+    import cv2
+
+    size = 100
+    H, W = 10, 10
+    canvas = np.random.rand(H * size, W * size, 3).astype("float32")
+    for h in range(H):
+        for w in range(W):
+            idx = h * W + w
+            if idx >= len(_COLORS):
+                break
+            canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx]
+    cv2.imshow("a", canvas)
+    cv2.waitKey(0)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/comm.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/comm.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b199a176c4fcecab155674c52fa7dce2740315c
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/comm.py
@@ -0,0 +1,263 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+This file contains primitives for multi-gpu communication.
+This is useful when doing distributed training.
+"""
+
+import functools
+import logging
+import numpy as np
+import pickle
+import torch
+import torch.distributed as dist
+
+_LOCAL_PROCESS_GROUP = None
+"""
+A torch process group which only includes processes that on the same machine as the current process.
+This variable is set when processes are spawned by `launch()` in "engine/launch.py".
+"""
+
+
+def get_world_size() -> int:
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank() -> int:
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def get_local_rank() -> int:
+    """
+    Returns:
+        The rank of the current process within the local (per-machine) process group.
+    """
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    assert _LOCAL_PROCESS_GROUP is not None
+    return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
+
+
+def get_local_size() -> int:
+    """
+    Returns:
+        The size of the per-machine process group,
+        i.e. the number of processes per machine.
+    """
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
+
+
+def is_main_process() -> bool:
+    return get_rank() == 0
+
+
+def synchronize():
+    """
+    Helper function to synchronize (barrier) among all processes when
+    using distributed training
+    """
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+
+
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+    if dist.get_backend() == "nccl":
+        return dist.new_group(backend="gloo")
+    else:
+        return dist.group.WORLD
+
+
+def _serialize_to_tensor(data, group):
+    backend = dist.get_backend(group)
+    assert backend in ["gloo", "nccl"]
+    device = torch.device("cpu" if backend == "gloo" else "cuda")
+
+    buffer = pickle.dumps(data)
+    if len(buffer) > 1024 ** 3:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
+                get_rank(), len(buffer) / (1024 ** 3), device
+            )
+        )
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to(device=device)
+    return tensor
+
+
+def _pad_to_largest_tensor(tensor, group):
+    """
+    Returns:
+        list[int]: size of the tensor, on each rank
+        Tensor: padded tensor that has the max size
+    """
+    world_size = dist.get_world_size(group=group)
+    assert (
+        world_size >= 1
+    ), "comm.gather/all_gather must be called from ranks within the given group!"
+    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
+    size_list = [
+        torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
+    ]
+    dist.all_gather(size_list, local_size, group=group)
+    size_list = [int(size.item()) for size in size_list]
+
+    max_size = max(size_list)
+
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    if local_size != max_size:
+        padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
+        tensor = torch.cat((tensor, padding), dim=0)
+    return size_list, tensor
+
+
+def all_gather(data, group=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors).
+
+    Args:
+        data: any picklable object
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return [data]
+
+    tensor = _serialize_to_tensor(data, group)
+
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    tensor_list = [
+        torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+    ]
+    dist.all_gather(tensor_list, tensor, group=group)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def gather(data, dst=0, group=None):
+    """
+    Run gather on arbitrary picklable data (not necessarily tensors).
+
+    Args:
+        data: any picklable object
+        dst (int): destination rank
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+
+    Returns:
+        list[data]: on dst, a list of data gathered from each rank. Otherwise,
+            an empty list.
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group=group) == 1:
+        return [data]
+    rank = dist.get_rank(group=group)
+
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+
+    # receiving Tensor from all ranks
+    if rank == dst:
+        max_size = max(size_list)
+        tensor_list = [
+            torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+        ]
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+
+        data_list = []
+        for size, tensor in zip(size_list, tensor_list):
+            buffer = tensor.cpu().numpy().tobytes()[:size]
+            data_list.append(pickle.loads(buffer))
+        return data_list
+    else:
+        dist.gather(tensor, [], dst=dst, group=group)
+        return []
+
+
+def shared_random_seed():
+    """
+    Returns:
+        int: a random number that is the same across all workers.
+            If workers need a shared RNG, they can use this shared seed to
+            create one.
+
+    All workers must call this function, otherwise it will deadlock.
+    """
+    ints = np.random.randint(2 ** 31)
+    all_ints = all_gather(ints)
+    return all_ints[0]
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Reduce the values in the dictionary from all processes so that process with rank
+    0 has the reduced results.
+
+    Args:
+        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
+        average (bool): whether to do average or sum
+
+    Returns:
+        a dict with the same keys as input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.reduce(values, dst=0)
+        if dist.get_rank() == 0 and average:
+            # only main process gets accumulated, so only divide by
+            # world_size in this case
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/env.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/env.py
new file mode 100644
index 0000000000000000000000000000000000000000..40634c17c73273ac8927632be164f466cfe7d1fa
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/env.py
@@ -0,0 +1,170 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import importlib
+import importlib.util
+import logging
+import numpy as np
+import os
+import random
+import sys
+from datetime import datetime
+import torch
+
+__all__ = ["seed_all_rng"]
+
+
+TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
+"""
+PyTorch version as a tuple of 2 ints. Useful for comparison.
+"""
+
+
+DOC_BUILDING = os.getenv("_DOC_BUILDING", False)  # set in docs/conf.py
+"""
+Whether we're building documentation.
+"""
+
+
+def seed_all_rng(seed=None):
+    """
+    Set the random seed for the RNG in torch, numpy and python.
+
+    Args:
+        seed (int): if None, will use a strong random seed.
+    """
+    if seed is None:
+        seed = (
+            os.getpid()
+            + int(datetime.now().strftime("%S%f"))
+            + int.from_bytes(os.urandom(2), "big")
+        )
+        logger = logging.getLogger(__name__)
+        logger.info("Using a generated random seed {}".format(seed))
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    random.seed(seed)
+    os.environ["PYTHONHASHSEED"] = str(seed)
+
+
+# from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
+def _import_file(module_name, file_path, make_importable=False):
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    if make_importable:
+        sys.modules[module_name] = module
+    return module
+
+
+def _configure_libraries():
+    """
+    Configurations for some libraries.
+    """
+    # An environment option to disable `import cv2` globally,
+    # in case it leads to negative performance impact
+    disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False))
+    if disable_cv2:
+        sys.modules["cv2"] = None
+    else:
+        # Disable opencl in opencv since its interaction with cuda often has negative effects
+        # This envvar is supported after OpenCV 3.4.0
+        os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
+        try:
+            import cv2
+
+            if int(cv2.__version__.split(".")[0]) >= 3:
+                cv2.ocl.setUseOpenCL(False)
+        except ModuleNotFoundError:
+            # Other types of ImportError, if happened, should not be ignored.
+            # Because a failed opencv import could mess up address space
+            # https://github.com/skvark/opencv-python/issues/381
+            pass
+
+    def get_version(module, digit=2):
+        return tuple(map(int, module.__version__.split(".")[:digit]))
+
+    # fmt: off
+    assert get_version(torch) >= (1, 4), "Requires torch>=1.4"
+    import fvcore
+    assert get_version(fvcore, 3) >= (0, 1, 2), "Requires fvcore>=0.1.2"
+    import yaml
+    assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1"
+    # fmt: on
+
+
+_ENV_SETUP_DONE = False
+
+
+def setup_environment():
+    """Perform environment setup work. The default setup is a no-op, but this
+    function allows the user to specify a Python source file or a module in
+    the $DETECTRON2_ENV_MODULE environment variable, that performs
+    custom setup work that may be necessary to their computing environment.
+    """
+    global _ENV_SETUP_DONE
+    if _ENV_SETUP_DONE:
+        return
+    _ENV_SETUP_DONE = True
+
+    _configure_libraries()
+
+    custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE")
+
+    if custom_module_path:
+        setup_custom_environment(custom_module_path)
+    else:
+        # The default setup is a no-op
+        pass
+
+
+def setup_custom_environment(custom_module):
+    """
+    Load custom environment setup by importing a Python source file or a
+    module, and run the setup function.
+    """
+    if custom_module.endswith(".py"):
+        module = _import_file("detectron2.utils.env.custom_module", custom_module)
+    else:
+        module = importlib.import_module(custom_module)
+    assert hasattr(module, "setup_environment") and callable(module.setup_environment), (
+        "Custom environment module defined in {} does not have the "
+        "required callable attribute 'setup_environment'."
+    ).format(custom_module)
+    module.setup_environment()
+
+
+def fixup_module_metadata(module_name, namespace, keys=None):
+    """
+    Fix the __qualname__ of module members to be their exported api name, so
+    when they are referenced in docs, sphinx can find them. Reference:
+    https://github.com/python-trio/trio/blob/6754c74eacfad9cc5c92d5c24727a2f3b620624e/trio/_util.py#L216-L241
+    """
+    if not DOC_BUILDING:
+        return
+    seen_ids = set()
+
+    def fix_one(qualname, name, obj):
+        # avoid infinite recursion (relevant when using
+        # typing.Generic, for example)
+        if id(obj) in seen_ids:
+            return
+        seen_ids.add(id(obj))
+
+        mod = getattr(obj, "__module__", None)
+        if mod is not None and (mod.startswith(module_name) or mod.startswith("fvcore.")):
+            obj.__module__ = module_name
+            # Modules, unlike everything else in Python, put fully-qualitied
+            # names into their __name__ attribute. We check for "." to avoid
+            # rewriting these.
+            if hasattr(obj, "__name__") and "." not in obj.__name__:
+                obj.__name__ = name
+                obj.__qualname__ = qualname
+            if isinstance(obj, type):
+                for attr_name, attr_value in obj.__dict__.items():
+                    fix_one(objname + "." + attr_name, attr_name, attr_value)
+
+    if keys is None:
+        keys = namespace.keys()
+    for objname in keys:
+        if not objname.startswith("_"):
+            obj = namespace[objname]
+            fix_one(objname, objname, obj)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/events.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/events.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dee954bdd6ad7dc5ea999562d1d2b03c3a520d9
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/events.py
@@ -0,0 +1,486 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import datetime
+import json
+import logging
+import os
+import time
+from collections import defaultdict
+from contextlib import contextmanager
+from typing import Optional
+import torch
+from fvcore.common.history_buffer import HistoryBuffer
+
+from detectron2.utils.file_io import PathManager
+
+__all__ = [
+    "get_event_storage",
+    "JSONWriter",
+    "TensorboardXWriter",
+    "CommonMetricPrinter",
+    "EventStorage",
+]
+
+_CURRENT_STORAGE_STACK = []
+
+
+def get_event_storage():
+    """
+    Returns:
+        The :class:`EventStorage` object that's currently being used.
+        Throws an error if no :class:`EventStorage` is currently enabled.
+    """
+    assert len(
+        _CURRENT_STORAGE_STACK
+    ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!"
+    return _CURRENT_STORAGE_STACK[-1]
+
+
+class EventWriter:
+    """
+    Base class for writers that obtain events from :class:`EventStorage` and process them.
+    """
+
+    def write(self):
+        raise NotImplementedError
+
+    def close(self):
+        pass
+
+
+class JSONWriter(EventWriter):
+    """
+    Write scalars to a json file.
+
+    It saves scalars as one json per line (instead of a big json) for easy parsing.
+
+    Examples parsing such a json file:
+    ::
+        $ cat metrics.json | jq -s '.[0:2]'
+        [
+          {
+            "data_time": 0.008433341979980469,
+            "iteration": 19,
+            "loss": 1.9228371381759644,
+            "loss_box_reg": 0.050025828182697296,
+            "loss_classifier": 0.5316952466964722,
+            "loss_mask": 0.7236229181289673,
+            "loss_rpn_box": 0.0856662318110466,
+            "loss_rpn_cls": 0.48198649287223816,
+            "lr": 0.007173333333333333,
+            "time": 0.25401854515075684
+          },
+          {
+            "data_time": 0.007216215133666992,
+            "iteration": 39,
+            "loss": 1.282649278640747,
+            "loss_box_reg": 0.06222952902317047,
+            "loss_classifier": 0.30682939291000366,
+            "loss_mask": 0.6970193982124329,
+            "loss_rpn_box": 0.038663312792778015,
+            "loss_rpn_cls": 0.1471673548221588,
+            "lr": 0.007706666666666667,
+            "time": 0.2490077018737793
+          }
+        ]
+
+        $ cat metrics.json | jq '.loss_mask'
+        0.7126231789588928
+        0.689423680305481
+        0.6776131987571716
+        ...
+
+    """
+
+    def __init__(self, json_file, window_size=20):
+        """
+        Args:
+            json_file (str): path to the json file. New data will be appended if the file exists.
+            window_size (int): the window size of median smoothing for the scalars whose
+                `smoothing_hint` are True.
+        """
+        self._file_handle = PathManager.open(json_file, "a")
+        self._window_size = window_size
+        self._last_write = -1
+
+    def write(self):
+        storage = get_event_storage()
+        to_save = defaultdict(dict)
+
+        for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items():
+            # keep scalars that have not been written
+            if iter <= self._last_write:
+                continue
+            to_save[iter][k] = v
+        if len(to_save):
+            all_iters = sorted(to_save.keys())
+            self._last_write = max(all_iters)
+
+        for itr, scalars_per_iter in to_save.items():
+            scalars_per_iter["iteration"] = itr
+            self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n")
+        self._file_handle.flush()
+        try:
+            os.fsync(self._file_handle.fileno())
+        except AttributeError:
+            pass
+
+    def close(self):
+        self._file_handle.close()
+
+
+class TensorboardXWriter(EventWriter):
+    """
+    Write all scalars to a tensorboard file.
+    """
+
+    def __init__(self, log_dir: str, window_size: int = 20, **kwargs):
+        """
+        Args:
+            log_dir (str): the directory to save the output events
+            window_size (int): the scalars will be median-smoothed by this window size
+
+            kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)`
+        """
+        self._window_size = window_size
+        from torch.utils.tensorboard import SummaryWriter
+
+        self._writer = SummaryWriter(log_dir, **kwargs)
+        self._last_write = -1
+
+    def write(self):
+        storage = get_event_storage()
+        new_last_write = self._last_write
+        for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items():
+            if iter > self._last_write:
+                self._writer.add_scalar(k, v, iter)
+                new_last_write = max(new_last_write, iter)
+        self._last_write = new_last_write
+
+        # storage.put_{image,histogram} is only meant to be used by
+        # tensorboard writer. So we access its internal fields directly from here.
+        if len(storage._vis_data) >= 1:
+            for img_name, img, step_num in storage._vis_data:
+                self._writer.add_image(img_name, img, step_num)
+            # Storage stores all image data and rely on this writer to clear them.
+            # As a result it assumes only one writer will use its image data.
+            # An alternative design is to let storage store limited recent
+            # data (e.g. only the most recent image) that all writers can access.
+            # In that case a writer may not see all image data if its period is long.
+            storage.clear_images()
+
+        if len(storage._histograms) >= 1:
+            for params in storage._histograms:
+                self._writer.add_histogram_raw(**params)
+            storage.clear_histograms()
+
+    def close(self):
+        if hasattr(self, "_writer"):  # doesn't exist when the code fails at import
+            self._writer.close()
+
+
+class CommonMetricPrinter(EventWriter):
+    """
+    Print **common** metrics to the terminal, including
+    iteration time, ETA, memory, all losses, and the learning rate.
+    It also applies smoothing using a window of 20 elements.
+
+    It's meant to print common metrics in common ways.
+    To print something in more customized ways, please implement a similar printer by yourself.
+    """
+
+    def __init__(self, max_iter: Optional[int] = None, window_size: int = 20):
+        """
+        Args:
+            max_iter: the maximum number of iterations to train.
+                Used to compute ETA. If not given, ETA will not be printed.
+            window_size (int): the losses will be median-smoothed by this window size
+        """
+        self.logger = logging.getLogger(__name__)
+        self._max_iter = max_iter
+        self._window_size = window_size
+        self._last_write = None  # (step, time) of last call to write(). Used to compute ETA
+
+    def _get_eta(self, storage) -> Optional[str]:
+        if self._max_iter is None:
+            return ""
+        iteration = storage.iter
+        try:
+            eta_seconds = storage.history("time").median(1000) * (self._max_iter - iteration - 1)
+            storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False)
+            return str(datetime.timedelta(seconds=int(eta_seconds)))
+        except KeyError:
+            # estimate eta on our own - more noisy
+            eta_string = None
+            if self._last_write is not None:
+                estimate_iter_time = (time.perf_counter() - self._last_write[1]) / (
+                    iteration - self._last_write[0]
+                )
+                eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+            self._last_write = (iteration, time.perf_counter())
+            return eta_string
+
+    def write(self):
+        storage = get_event_storage()
+        iteration = storage.iter
+        if iteration == self._max_iter:
+            # This hook only reports training progress (loss, ETA, etc) but not other data,
+            # therefore do not write anything after training succeeds, even if this method
+            # is called.
+            return
+
+        try:
+            data_time = storage.history("data_time").avg(20)
+        except KeyError:
+            # they may not exist in the first few iterations (due to warmup)
+            # or when SimpleTrainer is not used
+            data_time = None
+        try:
+            iter_time = storage.history("time").global_avg()
+        except KeyError:
+            iter_time = None
+        try:
+            lr = "{:.5g}".format(storage.history("lr").latest())
+        except KeyError:
+            lr = "N/A"
+
+        eta_string = self._get_eta(storage)
+
+        if torch.cuda.is_available():
+            max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0
+        else:
+            max_mem_mb = None
+
+        # NOTE: max_mem is parsed by grep in "dev/parse_results.sh"
+        self.logger.info(
+            " {eta}iter: {iter}  {losses}  {time}{data_time}lr: {lr}  {memory}".format(
+                eta=f"eta: {eta_string}  " if eta_string else "",
+                iter=iteration,
+                losses="  ".join(
+                    [
+                        "{}: {:.4g}".format(k, v.median(self._window_size))
+                        for k, v in storage.histories().items()
+                        if "loss" in k
+                    ]
+                ),
+                time="time: {:.4f}  ".format(iter_time) if iter_time is not None else "",
+                data_time="data_time: {:.4f}  ".format(data_time) if data_time is not None else "",
+                lr=lr,
+                memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "",
+            )
+        )
+
+
+class EventStorage:
+    """
+    The user-facing class that provides metric storage functionalities.
+
+    In the future we may add support for storing / logging other types of data if needed.
+    """
+
+    def __init__(self, start_iter=0):
+        """
+        Args:
+            start_iter (int): the iteration number to start with
+        """
+        self._history = defaultdict(HistoryBuffer)
+        self._smoothing_hints = {}
+        self._latest_scalars = {}
+        self._iter = start_iter
+        self._current_prefix = ""
+        self._vis_data = []
+        self._histograms = []
+
+    def put_image(self, img_name, img_tensor):
+        """
+        Add an `img_tensor` associated with `img_name`, to be shown on
+        tensorboard.
+
+        Args:
+            img_name (str): The name of the image to put into tensorboard.
+            img_tensor (torch.Tensor or numpy.array): An `uint8` or `float`
+                Tensor of shape `[channel, height, width]` where `channel` is
+                3. The image format should be RGB. The elements in img_tensor
+                can either have values in [0, 1] (float32) or [0, 255] (uint8).
+                The `img_tensor` will be visualized in tensorboard.
+        """
+        self._vis_data.append((img_name, img_tensor, self._iter))
+
+    def put_scalar(self, name, value, smoothing_hint=True):
+        """
+        Add a scalar `value` to the `HistoryBuffer` associated with `name`.
+
+        Args:
+            smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be
+                smoothed when logged. The hint will be accessible through
+                :meth:`EventStorage.smoothing_hints`.  A writer may ignore the hint
+                and apply custom smoothing rule.
+
+                It defaults to True because most scalars we save need to be smoothed to
+                provide any useful signal.
+        """
+        name = self._current_prefix + name
+        history = self._history[name]
+        value = float(value)
+        history.update(value, self._iter)
+        self._latest_scalars[name] = (value, self._iter)
+
+        existing_hint = self._smoothing_hints.get(name)
+        if existing_hint is not None:
+            assert (
+                existing_hint == smoothing_hint
+            ), "Scalar {} was put with a different smoothing_hint!".format(name)
+        else:
+            self._smoothing_hints[name] = smoothing_hint
+
+    def put_scalars(self, *, smoothing_hint=True, **kwargs):
+        """
+        Put multiple scalars from keyword arguments.
+
+        Examples:
+
+            storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True)
+        """
+        for k, v in kwargs.items():
+            self.put_scalar(k, v, smoothing_hint=smoothing_hint)
+
+    def put_histogram(self, hist_name, hist_tensor, bins=1000):
+        """
+        Create a histogram from a tensor.
+
+        Args:
+            hist_name (str): The name of the histogram to put into tensorboard.
+            hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted
+                into a histogram.
+            bins (int): Number of histogram bins.
+        """
+        ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item()
+
+        # Create a histogram with PyTorch
+        hist_counts = torch.histc(hist_tensor, bins=bins)
+        hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32)
+
+        # Parameter for the add_histogram_raw function of SummaryWriter
+        hist_params = dict(
+            tag=hist_name,
+            min=ht_min,
+            max=ht_max,
+            num=len(hist_tensor),
+            sum=float(hist_tensor.sum()),
+            sum_squares=float(torch.sum(hist_tensor ** 2)),
+            bucket_limits=hist_edges[1:].tolist(),
+            bucket_counts=hist_counts.tolist(),
+            global_step=self._iter,
+        )
+        self._histograms.append(hist_params)
+
+    def history(self, name):
+        """
+        Returns:
+            HistoryBuffer: the scalar history for name
+        """
+        ret = self._history.get(name, None)
+        if ret is None:
+            raise KeyError("No history metric available for {}!".format(name))
+        return ret
+
+    def histories(self):
+        """
+        Returns:
+            dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars
+        """
+        return self._history
+
+    def latest(self):
+        """
+        Returns:
+            dict[str -> (float, int)]: mapping from the name of each scalar to the most
+                recent value and the iteration number its added.
+        """
+        return self._latest_scalars
+
+    def latest_with_smoothing_hint(self, window_size=20):
+        """
+        Similar to :meth:`latest`, but the returned values
+        are either the un-smoothed original latest value,
+        or a median of the given window_size,
+        depend on whether the smoothing_hint is True.
+
+        This provides a default behavior that other writers can use.
+        """
+        result = {}
+        for k, (v, itr) in self._latest_scalars.items():
+            result[k] = (
+                self._history[k].median(window_size) if self._smoothing_hints[k] else v,
+                itr,
+            )
+        return result
+
+    def smoothing_hints(self):
+        """
+        Returns:
+            dict[name -> bool]: the user-provided hint on whether the scalar
+                is noisy and needs smoothing.
+        """
+        return self._smoothing_hints
+
+    def step(self):
+        """
+        User should either: (1) Call this function to increment storage.iter when needed. Or
+        (2) Set `storage.iter` to the correct iteration number before each iteration.
+
+        The storage will then be able to associate the new data with an iteration number.
+        """
+        self._iter += 1
+
+    @property
+    def iter(self):
+        """
+        Returns:
+            int: The current iteration number. When used together with a trainer,
+                this is ensured to be the same as trainer.iter.
+        """
+        return self._iter
+
+    @iter.setter
+    def iter(self, val):
+        self._iter = int(val)
+
+    @property
+    def iteration(self):
+        # for backward compatibility
+        return self._iter
+
+    def __enter__(self):
+        _CURRENT_STORAGE_STACK.append(self)
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        assert _CURRENT_STORAGE_STACK[-1] == self
+        _CURRENT_STORAGE_STACK.pop()
+
+    @contextmanager
+    def name_scope(self, name):
+        """
+        Yields:
+            A context within which all the events added to this storage
+            will be prefixed by the name scope.
+        """
+        old_prefix = self._current_prefix
+        self._current_prefix = name.rstrip("/") + "/"
+        yield
+        self._current_prefix = old_prefix
+
+    def clear_images(self):
+        """
+        Delete all the stored images for visualization. This should be called
+        after images are written to tensorboard.
+        """
+        self._vis_data = []
+
+    def clear_histograms(self):
+        """
+        Delete all the stored histograms for visualization.
+        This should be called after histograms are written to tensorboard.
+        """
+        self._histograms = []
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/file_io.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/file_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..46ee4ec31d04eee77976ff3edbbf84762a3409ed
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/file_io.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from iopath.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler
+from iopath.common.file_io import PathManager as PathManagerBase
+
+__all__ = ["PathManager", "PathHandler"]
+
+
+PathManager = PathManagerBase()
+"""
+This is a detectron2 project-specific PathManager.
+We try to stay away from global PathManager in fvcore as it
+introduces potential conflicts among other libraries.
+"""
+
+
+class Detectron2Handler(PathHandler):
+    """
+    Resolve anything that's hosted under detectron2's namespace.
+    """
+
+    PREFIX = "detectron2://"
+    S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
+
+    def _get_supported_prefixes(self):
+        return [self.PREFIX]
+
+    def _get_local_path(self, path, **kwargs):
+        name = path[len(self.PREFIX) :]
+        return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name, **kwargs)
+
+    def _open(self, path, mode="r", **kwargs):
+        return PathManager.open(self._get_local_path(path), mode, **kwargs)
+
+
+PathManager.register_handler(HTTPURLHandler())
+PathManager.register_handler(OneDrivePathHandler())
+PathManager.register_handler(Detectron2Handler())
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/logger.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c7890f8bec5db44098fe1a38d26eb13231f7063
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/logger.py
@@ -0,0 +1,237 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import atexit
+import functools
+import logging
+import os
+import sys
+import time
+from collections import Counter
+import torch
+from tabulate import tabulate
+from termcolor import colored
+
+from detectron2.utils.file_io import PathManager
+
+__all__ = ["setup_logger", "log_first_n", "log_every_n", "log_every_n_seconds"]
+
+
+class _ColorfulFormatter(logging.Formatter):
+    def __init__(self, *args, **kwargs):
+        self._root_name = kwargs.pop("root_name") + "."
+        self._abbrev_name = kwargs.pop("abbrev_name", "")
+        if len(self._abbrev_name):
+            self._abbrev_name = self._abbrev_name + "."
+        super(_ColorfulFormatter, self).__init__(*args, **kwargs)
+
+    def formatMessage(self, record):
+        record.name = record.name.replace(self._root_name, self._abbrev_name)
+        log = super(_ColorfulFormatter, self).formatMessage(record)
+        if record.levelno == logging.WARNING:
+            prefix = colored("WARNING", "red", attrs=["blink"])
+        elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL:
+            prefix = colored("ERROR", "red", attrs=["blink", "underline"])
+        else:
+            return log
+        return prefix + " " + log
+
+
+@functools.lru_cache()  # so that calling setup_logger multiple times won't add many handlers
+def setup_logger(
+    output=None, distributed_rank=0, *, color=True, name="detectron2", abbrev_name=None
+):
+    """
+    Initialize the detectron2 logger and set its verbosity level to "DEBUG".
+
+    Args:
+        output (str): a file name or a directory to save log. If None, will not save log file.
+            If ends with ".txt" or ".log", assumed to be a file name.
+            Otherwise, logs will be saved to `output/log.txt`.
+        name (str): the root module name of this logger
+        abbrev_name (str): an abbreviation of the module, to avoid long names in logs.
+            Set to "" to not log the root module in logs.
+            By default, will abbreviate "detectron2" to "d2" and leave other
+            modules unchanged.
+
+    Returns:
+        logging.Logger: a logger
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.DEBUG)
+    logger.propagate = False
+
+    if abbrev_name is None:
+        abbrev_name = "d2" if name == "detectron2" else name
+
+    plain_formatter = logging.Formatter(
+        "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S"
+    )
+    # stdout logging: master only
+    if distributed_rank == 0:
+        ch = logging.StreamHandler(stream=sys.stdout)
+        ch.setLevel(logging.DEBUG)
+        if color:
+            formatter = _ColorfulFormatter(
+                colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s",
+                datefmt="%m/%d %H:%M:%S",
+                root_name=name,
+                abbrev_name=str(abbrev_name),
+            )
+        else:
+            formatter = plain_formatter
+        ch.setFormatter(formatter)
+        logger.addHandler(ch)
+
+    # file logging: all workers
+    if output is not None:
+        if output.endswith(".txt") or output.endswith(".log"):
+            filename = output
+        else:
+            filename = os.path.join(output, "log.txt")
+        if distributed_rank > 0:
+            filename = filename + ".rank{}".format(distributed_rank)
+        PathManager.mkdirs(os.path.dirname(filename))
+
+        fh = logging.StreamHandler(_cached_log_stream(filename))
+        fh.setLevel(logging.DEBUG)
+        fh.setFormatter(plain_formatter)
+        logger.addHandler(fh)
+
+    return logger
+
+
+# cache the opened file object, so that different calls to `setup_logger`
+# with the same file name can safely write to the same file.
+@functools.lru_cache(maxsize=None)
+def _cached_log_stream(filename):
+    # use 1K buffer if writing to cloud storage
+    io = PathManager.open(filename, "a", buffering=1024 if "://" in filename else -1)
+    atexit.register(io.close)
+    return io
+
+
+"""
+Below are some other convenient logging methods.
+They are mainly adopted from
+https://github.com/abseil/abseil-py/blob/master/absl/logging/__init__.py
+"""
+
+
+def _find_caller():
+    """
+    Returns:
+        str: module name of the caller
+        tuple: a hashable key to be used to identify different callers
+    """
+    frame = sys._getframe(2)
+    while frame:
+        code = frame.f_code
+        if os.path.join("utils", "logger.") not in code.co_filename:
+            mod_name = frame.f_globals["__name__"]
+            if mod_name == "__main__":
+                mod_name = "detectron2"
+            return mod_name, (code.co_filename, frame.f_lineno, code.co_name)
+        frame = frame.f_back
+
+
+_LOG_COUNTER = Counter()
+_LOG_TIMER = {}
+
+
+def log_first_n(lvl, msg, n=1, *, name=None, key="caller"):
+    """
+    Log only for the first n times.
+
+    Args:
+        lvl (int): the logging level
+        msg (str):
+        n (int):
+        name (str): name of the logger to use. Will use the caller's module by default.
+        key (str or tuple[str]): the string(s) can be one of "caller" or
+            "message", which defines how to identify duplicated logs.
+            For example, if called with `n=1, key="caller"`, this function
+            will only log the first call from the same caller, regardless of
+            the message content.
+            If called with `n=1, key="message"`, this function will log the
+            same content only once, even if they are called from different places.
+            If called with `n=1, key=("caller", "message")`, this function
+            will not log only if the same caller has logged the same message before.
+    """
+    if isinstance(key, str):
+        key = (key,)
+    assert len(key) > 0
+
+    caller_module, caller_key = _find_caller()
+    hash_key = ()
+    if "caller" in key:
+        hash_key = hash_key + caller_key
+    if "message" in key:
+        hash_key = hash_key + (msg,)
+
+    _LOG_COUNTER[hash_key] += 1
+    if _LOG_COUNTER[hash_key] <= n:
+        logging.getLogger(name or caller_module).log(lvl, msg)
+
+
+def log_every_n(lvl, msg, n=1, *, name=None):
+    """
+    Log once per n times.
+
+    Args:
+        lvl (int): the logging level
+        msg (str):
+        n (int):
+        name (str): name of the logger to use. Will use the caller's module by default.
+    """
+    caller_module, key = _find_caller()
+    _LOG_COUNTER[key] += 1
+    if n == 1 or _LOG_COUNTER[key] % n == 1:
+        logging.getLogger(name or caller_module).log(lvl, msg)
+
+
+def log_every_n_seconds(lvl, msg, n=1, *, name=None):
+    """
+    Log no more than once per n seconds.
+
+    Args:
+        lvl (int): the logging level
+        msg (str):
+        n (int):
+        name (str): name of the logger to use. Will use the caller's module by default.
+    """
+    caller_module, key = _find_caller()
+    last_logged = _LOG_TIMER.get(key, None)
+    current_time = time.time()
+    if last_logged is None or current_time - last_logged >= n:
+        logging.getLogger(name or caller_module).log(lvl, msg)
+        _LOG_TIMER[key] = current_time
+
+
+def create_small_table(small_dict):
+    """
+    Create a small table using the keys of small_dict as headers. This is only
+    suitable for small dictionaries.
+
+    Args:
+        small_dict (dict): a result dictionary of only a few items.
+
+    Returns:
+        str: the table as a string.
+    """
+    keys, values = tuple(zip(*small_dict.items()))
+    table = tabulate(
+        [values],
+        headers=keys,
+        tablefmt="pipe",
+        floatfmt=".3f",
+        stralign="center",
+        numalign="center",
+    )
+    return table
+
+
+def _log_api_usage(identifier: str):
+    """
+    Internal function used to log the usage of different detectron2 components
+    inside facebook's infra.
+    """
+    torch._C._log_api_usage_once("detectron2." + identifier)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/memory.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/memory.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd494780b9dbbd1571688cd270bb9b53d113c13e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/memory.py
@@ -0,0 +1,84 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+from contextlib import contextmanager
+from functools import wraps
+import torch
+
+__all__ = ["retry_if_cuda_oom"]
+
+
+@contextmanager
+def _ignore_torch_cuda_oom():
+    """
+    A context which ignores CUDA OOM exception from pytorch.
+    """
+    try:
+        yield
+    except RuntimeError as e:
+        # NOTE: the string may change?
+        if "CUDA out of memory. " in str(e):
+            pass
+        else:
+            raise
+
+
+def retry_if_cuda_oom(func):
+    """
+    Makes a function retry itself after encountering
+    pytorch's CUDA OOM error.
+    It will first retry after calling `torch.cuda.empty_cache()`.
+
+    If that still fails, it will then retry by trying to convert inputs to CPUs.
+    In this case, it expects the function to dispatch to CPU implementation.
+    The return values may become CPU tensors as well and it's user's
+    responsibility to convert it back to CUDA tensor if needed.
+
+    Args:
+        func: a stateless callable that takes tensor-like objects as arguments
+
+    Returns:
+        a callable which retries `func` if OOM is encountered.
+
+    Examples:
+    ::
+        output = retry_if_cuda_oom(some_torch_function)(input1, input2)
+        # output may be on CPU even if inputs are on GPU
+
+    Note:
+        1. When converting inputs to CPU, it will only look at each argument and check
+           if it has `.device` and `.to` for conversion. Nested structures of tensors
+           are not supported.
+
+        2. Since the function might be called more than once, it has to be
+           stateless.
+    """
+
+    def maybe_to_cpu(x):
+        try:
+            like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
+        except AttributeError:
+            like_gpu_tensor = False
+        if like_gpu_tensor:
+            return x.to(device="cpu")
+        else:
+            return x
+
+    @wraps(func)
+    def wrapped(*args, **kwargs):
+        with _ignore_torch_cuda_oom():
+            return func(*args, **kwargs)
+
+        # Clear cache and retry
+        torch.cuda.empty_cache()
+        with _ignore_torch_cuda_oom():
+            return func(*args, **kwargs)
+
+        # Try on CPU. This slows down the code significantly, therefore print a notice.
+        logger = logging.getLogger(__name__)
+        logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
+        new_args = (maybe_to_cpu(x) for x in args)
+        new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
+        return func(*new_args, **new_kwargs)
+
+    return wrapped
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/registry.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea0434d9740ba781072d53967d25d2c69b91b62e
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/registry.py
@@ -0,0 +1,42 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from typing import Any
+import pydoc
+from fvcore.common.registry import Registry  # for backward compatibility.
+
+"""
+``Registry`` and `locate` provide ways to map a string (typically found
+in config files) to callable objects.
+"""
+
+__all__ = ["Registry", "locate"]
+
+
+def _convert_target_to_string(t: Any) -> Any:
+    """
+    Inverse of ``locate()``.
+    """
+    return f"{t.__module__}.{t.__qualname__}"
+
+
+def locate(name: str) -> Any:
+    """
+    Locate and return an object ``x`` using an input string ``{x.__module__}.{x.__qualname__}``,
+    such as "module.submodule.class_name".
+
+    Raise Exception if it cannot be found.
+    """
+    obj = pydoc.locate(name)
+
+    # Some cases (e.g. torch.optim.sgd.SGD) not handled correctly
+    # by pydoc.locate. Try a private function from hydra.
+    # Should use _locate directly if it's public.
+    if obj is None:
+        try:
+            from hydra.utils import get_method
+        except ImportError as e:
+            raise ImportError(f"Cannot dynamically locate object {name}!") from e
+        else:
+            obj = get_method(name)  # it raises if fails
+
+    return obj
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/serialize.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/serialize.py
new file mode 100644
index 0000000000000000000000000000000000000000..96bb153ec82117d062ad4849237d41d9877e7f9c
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/serialize.py
@@ -0,0 +1,29 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import cloudpickle
+
+
+class PicklableWrapper(object):
+    """
+    Wrap an object to make it more picklable, note that it uses
+    heavy weight serialization libraries that are slower than pickle.
+    It's best to use it only on closures (which are usually not picklable).
+
+    This is a simplified version of
+    https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
+    """
+
+    def __init__(self, obj):
+        self._obj = obj
+
+    def __reduce__(self):
+        s = cloudpickle.dumps(self._obj)
+        return cloudpickle.loads, (s,)
+
+    def __call__(self, *args, **kwargs):
+        return self._obj(*args, **kwargs)
+
+    def __getattr__(self, attr):
+        # Ensure that the wrapped object can be used seamlessly as the previous object.
+        if attr not in ["_obj"]:
+            return getattr(self._obj, attr)
+        return getattr(self, attr)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/testing.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..23c70b98d9d0d6c18d93a2488661c49c71a0c5fd
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/testing.py
@@ -0,0 +1,132 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import io
+import numpy as np
+import torch
+
+from detectron2 import model_zoo
+from detectron2.data import DatasetCatalog
+from detectron2.data.detection_utils import read_image
+from detectron2.modeling import build_model
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.file_io import PathManager
+
+
+"""
+Internal utilities for tests. Don't use except for writing tests.
+"""
+
+
+def get_model_no_weights(config_path):
+    """
+    Like model_zoo.get, but do not load any weights (even pretrained)
+    """
+    cfg = model_zoo.get_config(config_path)
+    if not torch.cuda.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+    return build_model(cfg)
+
+
+def random_boxes(num_boxes, max_coord=100, device="cpu"):
+    """
+    Create a random Nx4 boxes tensor, with coordinates < max_coord.
+    """
+    boxes = torch.rand(num_boxes, 4, device=device) * (max_coord * 0.5)
+    boxes.clamp_(min=1.0)  # tiny boxes cause numerical instability in box regression
+    # Note: the implementation of this function in torchvision is:
+    # boxes[:, 2:] += torch.rand(N, 2) * 100
+    # but it does not guarantee non-negative widths/heights constraints:
+    # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]:
+    boxes[:, 2:] += boxes[:, :2]
+    return boxes
+
+
+def get_sample_coco_image(tensor=True):
+    """
+    Args:
+        tensor (bool): if True, returns 3xHxW tensor.
+            else, returns a HxWx3 numpy array.
+
+    Returns:
+        an image, in BGR color.
+    """
+    try:
+        file_name = DatasetCatalog.get("coco_2017_val_100")[0]["file_name"]
+        if not PathManager.exists(file_name):
+            raise FileNotFoundError()
+    except IOError:
+        # for public CI to run
+        file_name = "http://images.cocodataset.org/train2017/000000000009.jpg"
+    ret = read_image(file_name, format="BGR")
+    if tensor:
+        ret = torch.from_numpy(np.ascontiguousarray(ret.transpose(2, 0, 1)))
+    return ret
+
+
+def convert_scripted_instances(instances):
+    """
+    Convert a scripted Instances object to a regular :class:`Instances` object
+    """
+    ret = Instances(instances.image_size)
+    for name in instances._field_names:
+        val = getattr(instances, "_" + name, None)
+        if val is not None:
+            ret.set(name, val)
+    return ret
+
+
+def assert_instances_allclose(input, other, *, rtol=1e-5, msg="", size_as_tensor=False):
+    """
+    Args:
+        input, other (Instances):
+        size_as_tensor: compare image_size of the Instances as tensors (instead of tuples).
+             Useful for comparing outputs of tracing.
+    """
+    if not isinstance(input, Instances):
+        input = convert_scripted_instances(input)
+    if not isinstance(other, Instances):
+        other = convert_scripted_instances(other)
+
+    if not msg:
+        msg = "Two Instances are different! "
+    else:
+        msg = msg.rstrip() + " "
+
+    size_error_msg = msg + f"image_size is {input.image_size} vs. {other.image_size}!"
+    if size_as_tensor:
+        assert torch.equal(
+            torch.tensor(input.image_size), torch.tensor(other.image_size)
+        ), size_error_msg
+    else:
+        assert input.image_size == other.image_size, size_error_msg
+    fields = sorted(input.get_fields().keys())
+    fields_other = sorted(other.get_fields().keys())
+    assert fields == fields_other, msg + f"Fields are {fields} vs {fields_other}!"
+
+    for f in fields:
+        val1, val2 = input.get(f), other.get(f)
+        if isinstance(val1, Boxes):
+            # boxes in the range of O(100) and can have a larger tolerance
+            assert torch.allclose(val1.tensor, val2.tensor, atol=100 * rtol), (
+                msg + f"Field {f} differs too much!"
+            )
+        elif isinstance(val1, torch.Tensor):
+            if val1.dtype.is_floating_point:
+                mag = torch.abs(val1).max().cpu().item()
+                assert torch.allclose(val1, val2, atol=mag * rtol), (
+                    msg + f"Field {f} differs too much!"
+                )
+            else:
+                assert torch.equal(val1, val2), msg + f"Field {f} is different!"
+        else:
+            raise ValueError(f"Don't know how to compare type {type(val1)}")
+
+
+def reload_script_model(module):
+    """
+    Save a jit module and load it back.
+    Similar to the `getExportImportCopy` function in torch/testing/
+    """
+    buffer = io.BytesIO()
+    torch.jit.save(module, buffer)
+    buffer.seek(0)
+    return torch.jit.load(buffer)
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/video_visualizer.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/video_visualizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..904ab1f6b7d8abb243ba05b300dd6d9c5e23ab14
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/video_visualizer.py
@@ -0,0 +1,236 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+import pycocotools.mask as mask_util
+
+from detectron2.utils.visualizer import (
+    ColorMode,
+    Visualizer,
+    _create_text_labels,
+    _PanopticPrediction,
+)
+
+from .colormap import random_color
+
+
+class _DetectedInstance:
+    """
+    Used to store data about detected objects in video frame,
+    in order to transfer color to objects in the future frames.
+
+    Attributes:
+        label (int):
+        bbox (tuple[float]):
+        mask_rle (dict):
+        color (tuple[float]): RGB colors in range (0, 1)
+        ttl (int): time-to-live for the instance. For example, if ttl=2,
+            the instance color can be transferred to objects in the next two frames.
+    """
+
+    __slots__ = ["label", "bbox", "mask_rle", "color", "ttl"]
+
+    def __init__(self, label, bbox, mask_rle, color, ttl):
+        self.label = label
+        self.bbox = bbox
+        self.mask_rle = mask_rle
+        self.color = color
+        self.ttl = ttl
+
+
+class VideoVisualizer:
+    def __init__(self, metadata, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            metadata (MetadataCatalog): image metadata.
+        """
+        self.metadata = metadata
+        self._old_instances = []
+        assert instance_mode in [
+            ColorMode.IMAGE,
+            ColorMode.IMAGE_BW,
+        ], "Other mode not supported yet."
+        self._instance_mode = instance_mode
+
+    def draw_instance_predictions(self, frame, predictions):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        frame_visualizer = Visualizer(frame, self.metadata)
+        num_instances = len(predictions)
+        if num_instances == 0:
+            return frame_visualizer.output
+
+        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
+        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+
+        if predictions.has("pred_masks"):
+            masks = predictions.pred_masks
+            # mask IOU is not yet enabled
+            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
+            # assert len(masks_rles) == num_instances
+        else:
+            masks = None
+
+        detected = [
+            _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8)
+            for i in range(num_instances)
+        ]
+        colors = self._assign_colors(detected)
+
+        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            # any() returns uint8 tensor
+            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
+                (masks.any(dim=0) > 0).numpy() if masks is not None else None
+            )
+            alpha = 0.3
+        else:
+            alpha = 0.5
+
+        frame_visualizer.overlay_instances(
+            # boxes=None if masks is not None else boxes,  # boxes are a bit distracting
+            boxes=boxes,
+            masks=masks,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+        )
+
+        return frame_visualizer.output
+
+    def draw_sem_seg(self, frame, sem_seg, area_threshold=None):
+        """
+        Args:
+            sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W),
+                each value is the integer label.
+            area_threshold (Optional[int]): only draw segmentations larger than the threshold
+        """
+        # don't need to do anything special
+        frame_visualizer = Visualizer(frame, self.metadata)
+        frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None)
+        return frame_visualizer.output
+
+    def draw_panoptic_seg_predictions(
+        self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5
+    ):
+        frame_visualizer = Visualizer(frame, self.metadata)
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
+                pred.non_empty_mask()
+            )
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            frame_visualizer.draw_binary_mask(
+                mask,
+                color=mask_color,
+                text=self.metadata.stuff_classes[category_idx],
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return frame_visualizer.output
+        # draw mask for all instances second
+        masks, sinfo = list(zip(*all_instances))
+        num_instances = len(masks)
+        masks_rles = mask_util.encode(
+            np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F")
+        )
+        assert len(masks_rles) == num_instances
+
+        category_ids = [x["category_id"] for x in sinfo]
+        detected = [
+            _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8)
+            for i in range(num_instances)
+        ]
+        colors = self._assign_colors(detected)
+        labels = [self.metadata.thing_classes[k] for k in category_ids]
+
+        frame_visualizer.overlay_instances(
+            boxes=None,
+            masks=masks,
+            labels=labels,
+            keypoints=None,
+            assigned_colors=colors,
+            alpha=alpha,
+        )
+        return frame_visualizer.output
+
+    def _assign_colors(self, instances):
+        """
+        Naive tracking heuristics to assign same color to the same instance,
+        will update the internal state of tracked instances.
+
+        Returns:
+            list[tuple[float]]: list of colors.
+        """
+
+        # Compute iou with either boxes or masks:
+        is_crowd = np.zeros((len(instances),), dtype=np.bool)
+        if instances[0].bbox is None:
+            assert instances[0].mask_rle is not None
+            # use mask iou only when box iou is None
+            # because box seems good enough
+            rles_old = [x.mask_rle for x in self._old_instances]
+            rles_new = [x.mask_rle for x in instances]
+            ious = mask_util.iou(rles_old, rles_new, is_crowd)
+            threshold = 0.5
+        else:
+            boxes_old = [x.bbox for x in self._old_instances]
+            boxes_new = [x.bbox for x in instances]
+            ious = mask_util.iou(boxes_old, boxes_new, is_crowd)
+            threshold = 0.6
+        if len(ious) == 0:
+            ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32")
+
+        # Only allow matching instances of the same label:
+        for old_idx, old in enumerate(self._old_instances):
+            for new_idx, new in enumerate(instances):
+                if old.label != new.label:
+                    ious[old_idx, new_idx] = 0
+
+        matched_new_per_old = np.asarray(ious).argmax(axis=1)
+        max_iou_per_old = np.asarray(ious).max(axis=1)
+
+        # Try to find match for each old instance:
+        extra_instances = []
+        for idx, inst in enumerate(self._old_instances):
+            if max_iou_per_old[idx] > threshold:
+                newidx = matched_new_per_old[idx]
+                if instances[newidx].color is None:
+                    instances[newidx].color = inst.color
+                    continue
+            # If an old instance does not match any new instances,
+            # keep it for the next frame in case it is just missed by the detector
+            inst.ttl -= 1
+            if inst.ttl > 0:
+                extra_instances.append(inst)
+
+        # Assign random color to newly-detected instances:
+        for inst in instances:
+            if inst.color is None:
+                inst.color = random_color(rgb=True, maximum=1)
+        self._old_instances = instances[:] + extra_instances
+        return [d.color for d in instances]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/visualizer.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/visualizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..add1fec6ab7889bbdd1f8b9056df37efc0d8a5aa
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/visualizer.py
@@ -0,0 +1,1398 @@
+# Edit by Yao Lu
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+import colorsys
+import logging
+import math
+import numpy as np
+from enum import Enum, unique
+import cv2
+import matplotlib as mpl
+import matplotlib.colors as mplc
+import matplotlib.figure as mplfigure
+import pycocotools.mask as mask_util
+import torch
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from .colormap import random_color
+from shapely.geometry import *
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["ColorMode", "VisImage", "Visualizer"]
+
+
+_SMALL_OBJECT_AREA_THRESH = 1000
+_LARGE_MASK_AREA_THRESH = 120000
+_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
+_BLACK = (0, 0, 0)
+_RED = (1.0, 0, 0)
+
+_KEYPOINT_THRESHOLD = 0.05
+
+
+def py_cpu_pnms(dets, scores, thresh):
+    pts = dets
+    # for i in xrange(dets.shape[0]):
+    #     pts.append([[int(bbox[i, 0]) + info_bbox[i, j], int(bbox[i, 1]) + info_bbox[i, j+1]] for j in xrange(0,28,2)])
+    scores = np.array(scores)
+    order = scores.argsort()[::-1]
+    areas = np.zeros(scores.shape)
+    order = scores.argsort()[::-1]
+    inter_areas = np.zeros((scores.shape[0], scores.shape[0]))
+    for il in range(len(pts)):
+        poly = Polygon(pts[il]).buffer(0.001)
+        areas[il] = poly.area
+        for jl in range(il, len(pts)):
+            polyj = Polygon(pts[jl].tolist()).buffer(0.001)
+            inS = poly.intersection(polyj)
+            try:
+                inter_areas[il][jl] = inS.area
+            except:
+                import pdb;pdb.set_trace()
+            inter_areas[jl][il] = inS.area
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        ovr = inter_areas[i][order[1:]] / ((areas[i]) + areas[order[1:]] - inter_areas[i][order[1:]])
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+@unique
+class ColorMode(Enum):
+    """
+    Enum of different color modes to use for instance visualizations.
+    """
+
+    IMAGE = 0
+    """
+    Picks a random color for every instance and overlay segmentations with low opacity.
+    """
+    SEGMENTATION = 1
+    """
+    Let instances of the same category have similar colors
+    (from metadata.thing_colors), and overlay them with
+    high opacity. This provides more attention on the quality of segmentation.
+    """
+    IMAGE_BW = 2
+    """
+    Same as IMAGE, but convert all areas without masks to gray-scale.
+    Only available for drawing per-instance mask predictions.
+    """
+
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (height, width), m.shape
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        #res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+
+class _PanopticPrediction:
+    def __init__(self, panoptic_seg, segments_info, metadata=None):
+        if segments_info is None:
+            assert metadata is not None
+            # If "segments_info" is None, we assume "panoptic_img" is a
+            # H*W int32 image storing the panoptic_id in the format of
+            # category_id * label_divisor + instance_id. We reserve -1 for
+            # VOID label.
+            label_divisor = metadata.label_divisor
+            segments_info = []
+            for panoptic_label in np.unique(panoptic_seg.numpy()):
+                if panoptic_label == -1:
+                    # VOID region.
+                    continue
+                pred_class = panoptic_label // label_divisor
+                isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
+                segments_info.append(
+                    {
+                        "id": int(panoptic_label),
+                        "category_id": int(pred_class),
+                        "isthing": bool(isthing),
+                    }
+                )
+        del metadata
+
+        self._seg = panoptic_seg
+
+        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
+        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
+        areas = areas.numpy()
+        sorted_idxs = np.argsort(-areas)
+        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
+        self._seg_ids = self._seg_ids.tolist()
+        for sid, area in zip(self._seg_ids, self._seg_areas):
+            if sid in self._sinfo:
+                self._sinfo[sid]["area"] = float(area)
+
+    def non_empty_mask(self):
+        """
+        Returns:
+            (H, W) array, a mask for all pixels that have a prediction
+        """
+        empty_ids = []
+        for id in self._seg_ids:
+            if id not in self._sinfo:
+                empty_ids.append(id)
+        if len(empty_ids) == 0:
+            return np.zeros(self._seg.shape, dtype=np.uint8)
+        assert (
+            len(empty_ids) == 1
+        ), ">1 ids corresponds to no labels. This is currently not supported"
+        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
+
+    def semantic_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or sinfo["isthing"]:
+                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
+                continue
+            yield (self._seg == sid).numpy().astype(np.bool), sinfo
+
+    def instance_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or not sinfo["isthing"]:
+                continue
+            mask = (self._seg == sid).numpy().astype(np.bool)
+            if mask.sum() > 0:
+                yield mask, sinfo
+
+
+def _create_text_labels(classes, scores, class_names):
+    """
+    Args:
+        classes (list[int] or None):
+        scores (list[float] or None):
+        class_names (list[str] or None):
+
+    Returns:
+        list[str] or None
+    """
+    labels = None
+    if classes is not None and class_names is not None and len(class_names) > 0:
+        labels = [class_names[i] for i in classes]
+    if scores is not None:
+        if labels is None:
+            labels = ["{:.0f}%".format(s * 100) for s in scores]
+        else:
+            # labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
+            #luyao
+            labels = ["{}.{:.0f}".format(l, s * 100) for l, s in zip(labels, scores)]
+    return labels
+
+
+class VisImage:
+    def __init__(self, img, scale=1.0):
+        """
+        Args:
+            img (ndarray): an RGB image of shape (H, W, 3).
+            scale (float): scale the input image
+        """
+        self.img = img
+        self.scale = scale
+        self.width, self.height = img.shape[1], img.shape[0]
+        self._setup_figure(img)
+
+    def _setup_figure(self, img):
+        """
+        Args:
+            Same as in :meth:`__init__()`.
+
+        Returns:
+            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
+            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
+        """
+        fig = mplfigure.Figure(frameon=False)
+        self.dpi = fig.get_dpi()
+        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
+        # (https://github.com/matplotlib/matplotlib/issues/15363)
+        fig.set_size_inches(
+            (self.width * self.scale + 1e-2) / self.dpi,
+            (self.height * self.scale + 1e-2) / self.dpi,
+        )
+        self.canvas = FigureCanvasAgg(fig)
+        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
+        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
+        ax.axis("off")
+        # Need to imshow this first so that other patches can be drawn on top
+        ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
+
+        self.fig = fig
+        self.ax = ax
+
+    def save(self, filepath):
+        """
+        Args:
+            filepath (str): a string that contains the absolute path, including the file name, where
+                the visualized image will be saved.
+        """
+        self.fig.savefig(filepath)
+       # self.fig.savefig(filepath[:-4]+'.svg', format='svg')
+
+    def get_image(self):
+        """
+        Returns:
+            ndarray:
+                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
+                The shape is scaled w.r.t the input image using the given `scale` argument.
+        """
+        canvas = self.canvas
+        s, (width, height) = canvas.print_to_buffer()
+        # buf = io.BytesIO()  # works for cairo backend
+        # canvas.print_rgba(buf)
+        # width, height = self.width, self.height
+        # s = buf.getvalue()
+
+        buffer = np.frombuffer(s, dtype="uint8")
+
+        img_rgba = buffer.reshape(height, width, 4)
+        rgb, alpha = np.split(img_rgba, [3], axis=2)
+        return rgb.astype("uint8")
+
+
+class Visualizer:
+    """
+    Visualizer that draws data about detection/segmentation on images.
+
+    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
+    that draw primitive objects to images, as well as high-level wrappers like
+    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
+    that draw composite data in some pre-defined style.
+
+    Note that the exact visualization style for the high-level wrappers are subject to change.
+    Style such as color, opacity, label contents, visibility of labels, or even the visibility
+    of objects themselves (e.g. when the object is too small) may change according
+    to different heuristics, as long as the results still look visually reasonable.
+    To obtain a consistent style, implement custom drawing functions with the primitive
+    methods instead.
+
+    This visualizer focuses on high rendering quality rather than performance. It is not
+    designed to be used for real-time applications.
+    """
+
+    # TODO implement a fast, rasterized version using OpenCV
+
+    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
+                the height and width of the image respectively. C is the number of
+                color channels. The image is required to be in RGB format since that
+                is a requirement of the Matplotlib library. The image is also expected
+                to be in the range [0, 255].
+            metadata (Metadata): image metadata.
+            instance_mode (ColorMode): defines one of the pre-defined style for drawing
+                instances on an image.
+        """
+        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
+        if metadata is None:
+            metadata = MetadataCatalog.get("__nonexist__")
+        self.metadata = metadata
+        self.output = VisImage(self.img, scale=scale)
+        self.cpu_device = torch.device("cpu")
+
+        # too small texts are useless, therefore clamp to 9
+        self._default_font_size = max(
+            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
+        )
+        self._instance_mode = instance_mode
+
+    def draw_instance_predictions(self, predictions, path):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = predictions.pred_classes if predictions.has("pred_classes") else None
+        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        #luyao#
+        # labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+        rec = predictions.pred_rec if predictions.has("pred_rec") else None
+        rec_score = predictions.pred_rec_score if predictions.has("pred_rec_score") else None
+        #luyao#
+        if predictions.has("pred_masks"):
+            masks = np.asarray(predictions.pred_masks)
+            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
+        else:
+            masks = None
+        # masks = None
+        
+        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
+            ]
+            #luyao#
+            alpha = 0.8
+        else:
+            colors = None
+            alpha = 0.77
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(
+                (predictions.pred_masks.any(dim=0) > 0).numpy()
+                if predictions.has("pred_masks")
+                else None
+            )
+            alpha = 0.3
+
+        self.overlay_instances(
+            rec=rec,
+            masks=masks,
+            boxes=boxes,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+            scores=scores,
+            path=path,
+            rec_score = rec_score
+        )
+        return self.output
+
+    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8):
+        """
+        Draw semantic segmentation predictions/labels.
+
+        Args:
+            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
+                Each value is the integer label of the pixel.
+            area_threshold (int): segments with less than `area_threshold` are not drawn.
+            alpha (float): the larger it is, the more opaque the segmentations are.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        if isinstance(sem_seg, torch.Tensor):
+            sem_seg = sem_seg.numpy()
+        labels, areas = np.unique(sem_seg, return_counts=True)
+        sorted_idxs = np.argsort(-areas).tolist()
+        labels = labels[sorted_idxs]
+        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
+            except (AttributeError, IndexError):
+                mask_color = None
+
+            binary_mask = (sem_seg == label).astype(np.uint8)
+            text = self.metadata.stuff_classes[label]
+            self.draw_binary_mask(
+                binary_mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        return self.output
+
+    def draw_panoptic_seg_predictions(
+        self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7
+    ):
+        """
+        Draw panoptic prediction results on an image.
+
+        Args:
+            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
+                segment.
+            segments_info (list[dict]): Describe each segment in `panoptic_seg`.
+                Each dict contains keys "id", "category_id", "isthing".
+            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(pred.non_empty_mask())
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            text = self.metadata.stuff_classes[category_idx]
+            self.draw_binary_mask(
+                mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        # draw mask for all instances second
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return self.output
+        masks, sinfo = list(zip(*all_instances))
+        category_ids = [x["category_id"] for x in sinfo]
+
+        try:
+            scores = [x["score"] for x in sinfo]
+        except KeyError:
+            scores = None
+        labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes)
+
+        try:
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids
+            ]
+        except AttributeError:
+            colors = None
+        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
+
+        return self.output
+
+    def draw_dataset_dict(self, dic):
+        """
+        Draw annotations/segmentaions in Detectron2 Dataset format.
+
+        Args:
+            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        annos = dic.get("annotations", None)
+        if annos:
+            if "segmentation" in annos[0]:
+                masks = [x["segmentation"] for x in annos]
+            else:
+                masks = None
+            if "keypoints" in annos[0]:
+                keypts = [x["keypoints"] for x in annos]
+                keypts = np.array(keypts).reshape(len(annos), -1, 3)
+            else:
+                keypts = None
+
+            boxes = [
+                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
+                if len(x["bbox"]) == 4
+                else x["bbox"]
+                for x in annos
+            ]
+
+            labels = [x["category_id"] for x in annos]
+            colors = None
+            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+                colors = [
+                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels
+                ]
+            names = self.metadata.get("thing_classes", None)
+            if names:
+                labels = [names[i] for i in labels]
+            labels = [
+                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
+                for i, a in zip(labels, annos)
+            ]
+            self.overlay_instances(
+                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
+            )
+
+        sem_seg = dic.get("sem_seg", None)
+        if sem_seg is None and "sem_seg_file_name" in dic:
+            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
+                sem_seg = Image.open(f)
+                sem_seg = np.asarray(sem_seg, dtype="uint8")
+        if sem_seg is not None:
+            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
+
+        pan_seg = dic.get("pan_seg", None)
+        if pan_seg is None and "pan_seg_file_name" in dic:
+            assert "segments_info" in dic
+            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
+                pan_seg = Image.open(f)
+                pan_seg = np.asarray(pan_seg)
+                from panopticapi.utils import rgb2id
+
+                pan_seg = rgb2id(pan_seg)
+            segments_info = dic["segments_info"]
+        if pan_seg is not None:
+            pan_seg = torch.Tensor(pan_seg)
+            self.draw_panoptic_seg_predictions(pan_seg, segments_info, area_threshold=0, alpha=0.5)
+        return self.output
+
+    def overlay_instances(
+        self,
+        *,
+        rec=None,
+        boxes=None,
+        labels=None,
+        masks=None,
+        keypoints=None,
+        assigned_colors=None,
+        alpha=0.5,
+        scores,
+        path,
+        rec_score,
+    ):
+        """
+        Args:
+            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
+                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
+                or a :class:`RotatedBoxes`,
+                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image,
+            labels (list[str]): the text to be displayed for each instance.
+            masks (masks-like object): Supported types are:
+
+                * :class:`detectron2.structures.PolygonMasks`,
+                  :class:`detectron2.structures.BitMasks`.
+                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
+                  The first level of the list corresponds to individual instances. The second
+                  level to all the polygon that compose the instance, and the third level
+                  to the polygon coordinates. The third level should have the format of
+                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
+                * list[dict]: each dict is a COCO-style RLE.
+            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
+                where the N is the number of instances and K is the number of keypoints.
+                The last dimension corresponds to (x, y, visibility or score).
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        rec = rec
+        def _decode_recognition(rec):
+            # CTLABELS = "_0123456789abcdefghijklmnopqrstuvwxyz"
+            CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
+            # ctc decoding
+            last_char = False
+            s = ''
+            for c in rec:
+                c = int(c)
+                if 0<c < 96:
+                   # if last_char != c:
+                    if CTLABELS[c-1] in "_0123456789abcdefghijklmnopqrstuvwxyz":
+                        s += CTLABELS[c-1]
+                        last_char = c
+                elif c == 96:
+                    s += u''
+                elif c == 97:
+                    if len(s) == 0:
+                        s = ' '
+                    return s
+            if len(s) == 0:
+                s = ' '
+            return s
+        def _ctc_decode_recognition(rec):
+            # ctc decoding
+            last_char = False
+            s = ''
+            CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
+            for c in rec:
+                c = int(c)
+                if c < 96:
+                    if last_char != c:
+                        if CTLABELS[c-1] in "_0123456789abcdefghijklmnopqrstuvwxyz":
+                            s += CTLABELS[c-1]
+                        last_char = c
+                else:
+                    last_char = False
+            return s
+
+        num_instances = None
+        if boxes is not None:
+            boxes = self._convert_boxes(boxes)
+            num_instances = len(boxes)
+        if masks is not None:
+            masks = self._convert_masks(masks)
+            if num_instances:
+                assert len(masks) == num_instances
+            else:
+                num_instances = len(masks)
+        if keypoints is not None:
+            if num_instances:
+                assert len(keypoints) == num_instances
+            else:
+                num_instances = len(keypoints)
+            keypoints = self._convert_keypoints(keypoints)
+        if labels is not None:
+            assert len(labels) == num_instances
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+        if num_instances == 0:
+            return self.output
+        if boxes is not None and boxes.shape[1] == 5:
+            return self.overlay_rotated_instances(
+                boxes=boxes, labels=labels, assigned_colors=assigned_colors
+            )
+
+        # Display in largest to smallest order to reduce occlusion.
+        areas = None
+        if boxes is not None:
+            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
+        elif masks is not None:
+            areas = np.asarray([x.area() for x in masks])
+
+        if areas is not None:
+            sorted_idxs = np.argsort(-areas).tolist()
+            # Re-order overlapped instances in descending order.
+            boxes = boxes[sorted_idxs] if boxes is not None else None
+            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
+            rec = [rec[idx] for idx in sorted_idxs] if rec is not None else None
+            # rec_score = [rec_score[idx] for idx in sorted_idxs] if rec is not None else None
+            scores = [scores[idx] for idx in sorted_idxs] if scores is not None else None
+            # assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
+            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
+        #luyao#
+        assigned_colors = [[0,113.985,118.955],[216.75,82.875,24.99],[236.895, 176.97, 31.875],[125.97, 46.92, 141.78],[118.83, 171.87, 47.94],[76.755, 189.975, 237.915],[161.925, 19.89, 46.92],\
+            [255,140,0 ],[70,130,180 ],[128,128,0 ],[205,92,92 ],[128,0,128 ],[255,182,193],[255,255,0],[105,105,105],[0,255,255],[0,255,0 ],\
+            [210,180,140],[255,0,0 ],[0,139,139],[255,0,255],[127,255,0],[75,0,130],[32,178,170],[255,215,0],[219,112,147],[148,0,211 ],\
+                [100,149,237],[175,238,238 ],[143,188,143],[255,255,224 ],[244,164,96],[188,143,143],[192,192,192 ],[220,20,60],[218,112,214],[147,112,219]]
+        rec = [_decode_recognition(rrec) for rrec in rec]
+        # assigned_colors = [[1,140/255,0],[30/255,144/255,1],[148/255,0,211/255],[0,1,1],[1,0,0],\
+        #     [30/255,143/255,1],[0.94,0.5,0.5],[1,1,0],[0.5,0.5,0],[0.823,0.412,0.117],[0.58,0,0.827],[0.5,0,0]\
+        #     ,[0.82,0.41,0.12],[0.41,0.41,0.41],[0,0.54,0.54],[0.75,0.25,0.65],[0.2,0.6,0.8],[0.74,0,0.3],[0,1.0,0.4],[1,0.5,0.5],[0.5,0.5,1]\
+        #         ,[0.6,0,1],[0.56,0.56,0.3],[0,1,0],[1.0,0.0,0.4],[0.0,1.0,0.4],[0.0,0.5,1.0],[1,215/255,0]]
+        poly = []
+        for i in range(num_instances):
+            bb = 1
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    if bb == 1:
+                        poly.append(masks[i].polygons[0].astype(int).reshape(-1,2))
+                        bb = 0
+        keep = py_cpu_pnms(poly,scores,0.5)
+        alpha = 0.7
+        for i in range(num_instances):
+            if rec[i] == ' ':
+                continue
+            if i not in keep:
+                continue
+            # color = assigned_colors[i]
+            # print(i)
+            color_ = assigned_colors[i%len(assigned_colors)]
+            color = [x/255 for x in color_]
+            # if boxes is not None:
+            #     self.draw_box(boxes[i], edge_color=color)
+            #luyao
+            # alpha = 0.6
+            H, W, _ = self.img.shape
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    # segment = polygon2rbox(segment, H, W)
+                    # segment = np.array(segment)
+                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
+            if labels is not None:
+                # first get a box
+                if boxes is not None:
+                    #luyao#
+                    x0, y0, x1, y1 = boxes[i]
+                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
+                    horiz_align = "left"
+                elif masks is not None:
+                    # skip small mask without polygon
+                    if len(masks[i].polygons) == 0:
+                        continue
+
+                    x0, y0, x1, y1 = masks[i].bbox()
+
+                    # draw text in the center (defined by median) when box is not drawn
+                    # median is less sensitive to outliers.
+                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
+                    horiz_align = "center"
+                else:
+                    continue  # drawing the box confidence for keypoints isn't very useful.
+                # for small objects, draw text at the side to avoid occlusion
+
+                instance_area = (y1 - y0) * (x1 - x0)
+                # print(x0,' ',x1,' ',y0,' ',y1,' ',self.output.height,' ', self.output.width)
+                #luyao#
+                if y0<5:
+                    text_pos = ((x0+x1)//2,(y0+y1)//2)
+                #luyao#
+                # if (
+                #     instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
+                #     or y1 - y0 < 40 * self.output.scale
+                # ):
+                #     if y1 >= self.output.height - 5:
+                #         text_pos = (x1, y0)
+                #     else:
+                #         text_pos = (x0, y1)
+
+                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
+                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+                font_size = (
+                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
+                    * 1.0
+                    * self._default_font_size
+                )
+                self.draw_text(
+                #    labels[i],
+                    # '',
+                    rec[i],
+                    text_pos,
+                    color=lighter_color,
+                    horizontal_alignment=horiz_align,
+                    font_size=font_size,
+                )
+        # draw keypoints
+        if keypoints is not None:
+            for keypoints_per_instance in keypoints:
+                self.draw_and_connect_keypoints(keypoints_per_instance)
+
+        return self.output
+
+    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
+        """
+        Args:
+            boxes (ndarray): an Nx5 numpy array of
+                (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image.
+            labels (list[str]): the text to be displayed for each instance.
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = len(boxes)
+
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+
+        if num_instances == 0:
+            return self.output
+
+        # Display in largest to smallest order to reduce occlusion.
+        if boxes is not None:
+            areas = boxes[:, 2] * boxes[:, 3]
+
+        sorted_idxs = np.argsort(-areas).tolist()
+        # Re-order overlapped instances in descending order.
+        boxes = boxes[sorted_idxs]
+        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+        colors = [assigned_colors[idx] for idx in sorted_idxs]
+
+        for i in range(num_instances):
+            self.draw_rotated_box_with_label(
+                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
+            )
+
+        return self.output
+
+    def draw_and_connect_keypoints(self, keypoints):
+        """
+        Draws keypoints of an instance and follows the rules for keypoint connections
+        to draw lines between appropriate keypoints. This follows color heuristics for
+        line color.
+
+        Args:
+            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
+                and the last dimension corresponds to (x, y, probability).
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        visible = {}
+        keypoint_names = self.metadata.get("keypoint_names")
+        for idx, keypoint in enumerate(keypoints):
+            # draw keypoint
+            x, y, prob = keypoint
+            if prob > _KEYPOINT_THRESHOLD:
+                self.draw_circle((x, y), color=_RED)
+                if keypoint_names:
+                    keypoint_name = keypoint_names[idx]
+                    visible[keypoint_name] = (x, y)
+
+        if self.metadata.get("keypoint_connection_rules"):
+            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
+                if kp0 in visible and kp1 in visible:
+                    x0, y0 = visible[kp0]
+                    x1, y1 = visible[kp1]
+                    color = tuple(x / 255.0 for x in color)
+                    self.draw_line([x0, x1], [y0, y1], color=color)
+
+        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
+        # Note that this strategy is specific to person keypoints.
+        # For other keypoints, it should just do nothing
+        try:
+            ls_x, ls_y = visible["left_shoulder"]
+            rs_x, rs_y = visible["right_shoulder"]
+            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
+        except KeyError:
+            pass
+        else:
+            # draw line from nose to mid-shoulder
+            nose_x, nose_y = visible.get("nose", (None, None))
+            if nose_x is not None:
+                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
+
+            try:
+                # draw line from mid-shoulder to mid-hip
+                lh_x, lh_y = visible["left_hip"]
+                rh_x, rh_y = visible["right_hip"]
+            except KeyError:
+                pass
+            else:
+                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
+                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
+        return self.output
+
+    """
+    Primitive drawing functions:
+    """
+
+    def draw_text(
+        self,
+        text,
+        position,
+        *,
+        font_size=None,
+        color="g",
+        horizontal_alignment="center",
+        rotation=0
+    ):
+        """
+        Args:
+            text (str): class label
+            position (tuple): a tuple of the x and y coordinates to place text on image.
+            font_size (int, optional): font of the text. If not provided, a font size
+                proportional to the image width is calculated and used.
+            color: color of the text. Refer to `matplotlib.colors` for full list
+                of formats that are accepted.
+            horizontal_alignment (str): see `matplotlib.text.Text`
+            rotation: rotation angle in degrees CCW
+
+        Returns:
+            output (VisImage): image object with text drawn.
+        """
+        if not font_size:
+            font_size = self._default_font_size
+
+        # print(font_size, self.output.scale)
+
+        # since the text background is dark, we don't want the text to be dark
+        # color = np.maximum(list(mplc.to_rgb(color)), 0.2)
+        # color[np.argmax(color)] = max(0.8, np.max(color))
+        #luyao#
+        color = 'w'
+        # font_size = 7.0
+        x, y = position
+        self.output.ax.text(
+            x,
+            y,
+            text,
+            size=font_size * self.output.scale,
+            # family="sans-serif",
+            family="monospace",
+            # family="serif",
+            #luyao#
+            bbox={"facecolor": "black", "alpha": 0.0, "pad": 0.0, "edgecolor": "none"},
+            # bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
+            # verticalalignment="top",
+            verticalalignment="bottom",
+            horizontalalignment=horizontal_alignment,
+            color=color,
+            zorder=10,
+            
+            rotation=rotation,
+            #luyao
+            # fontweight='light'
+        )
+        return self.output
+
+    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
+        """
+        Args:
+            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
+                are the coordinates of the image's top left corner. x1 and y1 are the
+                coordinates of the image's bottom right corner.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x0, y0, x1, y1 = box_coord
+        width = x1 - x0
+        height = y1 - y0
+
+        # linewidth = max(self._default_font_size / 16, 1)
+        # linewidth = max(self._default_font_size / 4, 1)
+        #luyao#
+        edge_color=[0.196,0.80,0.196]
+        alpha = 1.0
+        linewidth = 0.7
+
+        self.output.ax.add_patch(
+            mpl.patches.Rectangle(
+                (x0, y0),
+                width,
+                height,
+                fill=False,
+                edgecolor=edge_color,
+                linewidth=linewidth * self.output.scale,
+                alpha=alpha,
+                linestyle=line_style,
+            )
+        )
+        return self.output
+
+    def draw_rotated_box_with_label(
+        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
+    ):
+        """
+        Draw a rotated box with label on its top-left corner.
+
+        Args:
+            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
+                where cnt_x and cnt_y are the center coordinates of the box.
+                w and h are the width and height of the box. angle represents how
+                many degrees the box is rotated CCW with regard to the 0-degree box.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+            label (string): label for rotated box. It will not be rendered when set to None.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        cnt_x, cnt_y, w, h, angle = rotated_box
+        area = w * h
+        # use thinner lines when the box is small
+        linewidth = self._default_font_size / (
+            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
+        )
+
+        theta = angle * math.pi / 180.0
+        c = math.cos(theta)
+        s = math.sin(theta)
+        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
+        # x: left->right ; y: top->down
+        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
+        for k in range(4):
+            j = (k + 1) % 4
+            self.draw_line(
+                [rotated_rect[k][0], rotated_rect[j][0]],
+                [rotated_rect[k][1], rotated_rect[j][1]],
+                color=edge_color,
+                linestyle="--" if k == 1 else line_style,
+                linewidth=linewidth,
+            )
+
+        if label is not None:
+            text_pos = rotated_rect[1]  # topleft corner
+
+            height_ratio = h / np.sqrt(self.output.height * self.output.width)
+            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
+            font_size = (
+                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
+            )
+            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
+
+        return self.output
+
+    def draw_circle(self, circle_coord, color, radius=3):
+        """
+        Args:
+            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
+                of the center of the circle.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            radius (int): radius of the circle.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x, y = circle_coord
+        self.output.ax.add_patch(
+            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
+        )
+        return self.output
+
+    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
+        """
+        Args:
+            x_data (list[int]): a list containing x values of all the points being drawn.
+                Length of list should match the length of y_data.
+            y_data (list[int]): a list containing y values of all the points being drawn.
+                Length of list should match the length of x_data.
+            color: color of the line. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
+                for a full list of formats that are accepted.
+            linewidth (float or None): width of the line. When it's None,
+                a default value will be computed and used.
+
+        Returns:
+            output (VisImage): image object with line drawn.
+        """
+        if linewidth is None:
+            linewidth = self._default_font_size / 3
+        linewidth = max(linewidth, 1)
+        self.output.ax.add_line(
+            mpl.lines.Line2D(
+                x_data,
+                y_data,
+                linewidth=linewidth * self.output.scale,
+                color=color,
+                linestyle=linestyle,
+            )
+        )
+        return self.output
+
+    def draw_binary_mask(
+        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=0
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn in the object's center of mass.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component small than this will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = False
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+
+        if not mask.has_holes:
+            # draw polygons for regular masks
+            for segment in mask.polygons:
+                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
+                if area < (area_threshold or 0):
+                    continue
+                has_valid_segment = True
+                segment = segment.reshape(-1, 2)
+                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
+        else:
+            # TODO: Use Path/PathPatch to draw vector graphics:
+            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+            rgba = np.zeros(shape2d + (4,), dtype="float32")
+            rgba[:, :, :3] = color
+            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+            has_valid_segment = True
+            self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if text is not None and has_valid_segment:
+            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+            largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+            # draw text on the largest component, as well as other very large components.
+            for cid in range(1, _num_cc):
+                if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+                    # median is more stable than centroid
+                    # center = centroids[largest_component_id]
+                    center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+                    self.draw_text(text, center, color=lighter_color)
+        return self.output
+
+    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
+        """
+        Args:
+            segment: numpy array of shape Nx2, containing all the points in the polygon.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted. If not provided, a darker shade
+                of the polygon color will be used instead.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with polygon drawn.
+        """
+        #luyao#
+        # edge_color = []
+        if edge_color is None:
+            # make edge color darker than the polygon color
+            if alpha > 0.8:
+                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
+            else:
+                edge_color = color
+        edge_color = mplc.to_rgb(edge_color) + (1,)
+
+        polygon = mpl.patches.Polygon(
+            segment,
+            fill=True,
+            facecolor=mplc.to_rgb(color) + (alpha,),
+            #luyao# qudiaomaskyanse
+            # edgecolor=edge_color,
+            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
+        )
+        self.output.ax.add_patch(polygon)
+        return self.output
+
+    """
+    Internal methods:
+    """
+
+    def _jitter(self, color):
+        """
+        Randomly modifies given color to produce a slightly different color than the color given.
+
+        Args:
+            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
+                picked. The values in the list are in the [0.0, 1.0] range.
+
+        Returns:
+            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
+                color after being jittered. The values in the list are in the [0.0, 1.0] range.
+        """
+        color = mplc.to_rgb(color)
+        vec = np.random.rand(3)
+        # better to do it in another color space
+        vec = vec / np.linalg.norm(vec) * 0.5
+        res = np.clip(vec + color, 0, 1)
+        return tuple(res)
+
+    def _create_grayscale_image(self, mask=None):
+        """
+        Create a grayscale version of the original image.
+        The colors in masked area, if given, will be kept.
+        """
+        img_bw = self.img.astype("f4").mean(axis=2)
+        img_bw = np.stack([img_bw] * 3, axis=2)
+        if mask is not None:
+            img_bw[mask] = self.img[mask]
+        return img_bw
+
+    def _change_color_brightness(self, color, brightness_factor):
+        """
+        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
+        less or more saturation than the original color.
+
+        Args:
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
+                0 will correspond to no change, a factor in [-1.0, 0) range will result in
+                a darker color and a factor in (0, 1.0] range will result in a lighter color.
+
+        Returns:
+            modified_color (tuple[double]): a tuple containing the RGB values of the
+                modified color. Each value in the tuple is in the [0.0, 1.0] range.
+        """
+        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
+        color = mplc.to_rgb(color)
+        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
+        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
+        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
+        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
+        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
+        return modified_color
+
+    def _convert_boxes(self, boxes):
+        """
+        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
+        """
+        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+            return boxes.tensor.numpy()
+        else:
+            return np.asarray(boxes)
+
+    def _convert_masks(self, masks_or_polygons):
+        """
+        Convert different format of masks or polygons to a tuple of masks and polygons.
+
+        Returns:
+            list[GenericMask]:
+        """
+
+        m = masks_or_polygons
+        if isinstance(m, PolygonMasks):
+            m = m.polygons
+        if isinstance(m, BitMasks):
+            m = m.tensor.numpy()
+        if isinstance(m, torch.Tensor):
+            m = m.numpy()
+        ret = []
+        for x in m:
+            if isinstance(x, GenericMask):
+                ret.append(x)
+            else:
+                ret.append(GenericMask(x, self.output.height, self.output.width))
+        return ret
+
+    def _convert_keypoints(self, keypoints):
+        if isinstance(keypoints, Keypoints):
+            keypoints = keypoints.tensor
+        keypoints = np.asarray(keypoints)
+        return keypoints
+
+    def get_output(self):
+        """
+        Returns:
+            output (VisImage): the image output containing the visualizations added
+            to the image.
+        """
+        return self.output
+        
+def polygon2rbox(polygon, image_height, image_width):
+    poly = np.array(polygon).reshape((-1, 2)).astype(np.float32)
+    rect = cv2.minAreaRect(poly)
+    corners = cv2.boxPoints(rect)
+    corners = np.array(corners, dtype="int")
+    pts = get_tight_rect(corners, 0, 0, image_height, image_width, 1)
+    pts = list(map(int, pts))
+    return pts
+
+def get_tight_rect(points, start_x, start_y, image_height, image_width, scale):
+    points = list(points)
+    ps = sorted(points, key=lambda x: x[0])
+
+    if ps[1][1] > ps[0][1]:
+        px1 = ps[0][0] * scale + start_x
+        py1 = ps[0][1] * scale + start_y
+        px4 = ps[1][0] * scale + start_x
+        py4 = ps[1][1] * scale + start_y
+    else:
+        px1 = ps[1][0] * scale + start_x
+        py1 = ps[1][1] * scale + start_y
+        px4 = ps[0][0] * scale + start_x
+        py4 = ps[0][1] * scale + start_y
+    if ps[3][1] > ps[2][1]:
+        px2 = ps[2][0] * scale + start_x
+        py2 = ps[2][1] * scale + start_y
+        px3 = ps[3][0] * scale + start_x
+        py3 = ps[3][1] * scale + start_y
+    else:
+        px2 = ps[3][0] * scale + start_x
+        py2 = ps[3][1] * scale + start_y
+        px3 = ps[2][0] * scale + start_x
+        py3 = ps[2][1] * scale + start_y
+
+    px1 = min(max(px1, 1), image_width - 1)
+    px2 = min(max(px2, 1), image_width - 1)
+    px3 = min(max(px3, 1), image_width - 1)
+    px4 = min(max(px4, 1), image_width - 1)
+    py1 = min(max(py1, 1), image_height - 1)
+    py2 = min(max(py2, 1), image_height - 1)
+    py3 = min(max(py3, 1), image_height - 1)
+    py4 = min(max(py4, 1), image_height - 1)
+    return [px1, py1, px2, py2, px3, py3, px4, py4]
\ No newline at end of file
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/visualizer_chn.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/visualizer_chn.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec1e7a4327fad2f573e16de435bb64466ba000c2
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/visualizer_chn.py
@@ -0,0 +1,1376 @@
+# Edit by Yao Lu
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+import colorsys
+import logging
+import math
+import numpy as np
+from enum import Enum, unique
+import cv2
+import matplotlib as mpl
+import matplotlib.colors as mplc
+import matplotlib.figure as mplfigure
+import pycocotools.mask as mask_util
+import torch
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from .colormap import random_color
+from shapely.geometry import *
+import pickle
+import matplotlib.font_manager as mfm
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["ColorMode", "VisImage", "Visualizer"]
+
+
+_SMALL_OBJECT_AREA_THRESH = 1000
+_LARGE_MASK_AREA_THRESH = 120000
+_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
+_BLACK = (0, 0, 0)
+_RED = (1.0, 0, 0)
+
+_KEYPOINT_THRESHOLD = 0.05
+
+
+def py_cpu_pnms(dets, scores, thresh):
+    pts = dets
+    # for i in xrange(dets.shape[0]):
+    #     pts.append([[int(bbox[i, 0]) + info_bbox[i, j], int(bbox[i, 1]) + info_bbox[i, j+1]] for j in xrange(0,28,2)])
+    scores = np.array(scores)
+    order = scores.argsort()[::-1]
+    areas = np.zeros(scores.shape)
+    order = scores.argsort()[::-1]
+    inter_areas = np.zeros((scores.shape[0], scores.shape[0]))
+    for il in range(len(pts)):
+        poly = Polygon(pts[il]).buffer(0.001)
+        areas[il] = poly.area
+        for jl in range(il, len(pts)):
+            polyj = Polygon(pts[jl].tolist()).buffer(0.001)
+            inS = poly.intersection(polyj)
+            try:
+                inter_areas[il][jl] = inS.area
+            except:
+                import pdb;pdb.set_trace()
+            inter_areas[jl][il] = inS.area
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        ovr = inter_areas[i][order[1:]] / ((areas[i]) + areas[order[1:]] - inter_areas[i][order[1:]])
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+@unique
+class ColorMode(Enum):
+    """
+    Enum of different color modes to use for instance visualizations.
+    """
+
+    IMAGE = 0
+    """
+    Picks a random color for every instance and overlay segmentations with low opacity.
+    """
+    SEGMENTATION = 1
+    """
+    Let instances of the same category have similar colors
+    (from metadata.thing_colors), and overlay them with
+    high opacity. This provides more attention on the quality of segmentation.
+    """
+    IMAGE_BW = 2
+    """
+    Same as IMAGE, but convert all areas without masks to gray-scale.
+    Only available for drawing per-instance mask predictions.
+    """
+
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (height, width), m.shape
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        #res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+
+class _PanopticPrediction:
+    def __init__(self, panoptic_seg, segments_info, metadata=None):
+        if segments_info is None:
+            assert metadata is not None
+            # If "segments_info" is None, we assume "panoptic_img" is a
+            # H*W int32 image storing the panoptic_id in the format of
+            # category_id * label_divisor + instance_id. We reserve -1 for
+            # VOID label.
+            label_divisor = metadata.label_divisor
+            segments_info = []
+            for panoptic_label in np.unique(panoptic_seg.numpy()):
+                if panoptic_label == -1:
+                    # VOID region.
+                    continue
+                pred_class = panoptic_label // label_divisor
+                isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
+                segments_info.append(
+                    {
+                        "id": int(panoptic_label),
+                        "category_id": int(pred_class),
+                        "isthing": bool(isthing),
+                    }
+                )
+        del metadata
+
+        self._seg = panoptic_seg
+
+        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
+        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
+        areas = areas.numpy()
+        sorted_idxs = np.argsort(-areas)
+        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
+        self._seg_ids = self._seg_ids.tolist()
+        for sid, area in zip(self._seg_ids, self._seg_areas):
+            if sid in self._sinfo:
+                self._sinfo[sid]["area"] = float(area)
+
+    def non_empty_mask(self):
+        """
+        Returns:
+            (H, W) array, a mask for all pixels that have a prediction
+        """
+        empty_ids = []
+        for id in self._seg_ids:
+            if id not in self._sinfo:
+                empty_ids.append(id)
+        if len(empty_ids) == 0:
+            return np.zeros(self._seg.shape, dtype=np.uint8)
+        assert (
+            len(empty_ids) == 1
+        ), ">1 ids corresponds to no labels. This is currently not supported"
+        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
+
+    def semantic_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or sinfo["isthing"]:
+                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
+                continue
+            yield (self._seg == sid).numpy().astype(np.bool), sinfo
+
+    def instance_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or not sinfo["isthing"]:
+                continue
+            mask = (self._seg == sid).numpy().astype(np.bool)
+            if mask.sum() > 0:
+                yield mask, sinfo
+
+
+def _create_text_labels(classes, scores, class_names):
+    """
+    Args:
+        classes (list[int] or None):
+        scores (list[float] or None):
+        class_names (list[str] or None):
+
+    Returns:
+        list[str] or None
+    """
+    labels = None
+    if classes is not None and class_names is not None and len(class_names) > 0:
+        labels = [class_names[i] for i in classes]
+    if scores is not None:
+        if labels is None:
+            labels = ["{:.0f}%".format(s * 100) for s in scores]
+        else:
+            # labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
+            #luyao
+            labels = ["{}.{:.0f}".format(l, s * 100) for l, s in zip(labels, scores)]
+    return labels
+
+
+class VisImage:
+    def __init__(self, img, scale=1.0):
+        """
+        Args:
+            img (ndarray): an RGB image of shape (H, W, 3).
+            scale (float): scale the input image
+        """
+        self.img = img
+        self.scale = scale
+        self.width, self.height = img.shape[1], img.shape[0]
+        self._setup_figure(img)
+
+    def _setup_figure(self, img):
+        """
+        Args:
+            Same as in :meth:`__init__()`.
+
+        Returns:
+            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
+            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
+        """
+        fig = mplfigure.Figure(frameon=False)
+        self.dpi = fig.get_dpi()
+        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
+        # (https://github.com/matplotlib/matplotlib/issues/15363)
+        fig.set_size_inches(
+            (self.width * self.scale + 1e-2) / self.dpi,
+            (self.height * self.scale + 1e-2) / self.dpi,
+        )
+        self.canvas = FigureCanvasAgg(fig)
+        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
+        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
+        ax.axis("off")
+        # Need to imshow this first so that other patches can be drawn on top
+        ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
+
+        self.fig = fig
+        self.ax = ax
+
+    def save(self, filepath):
+        """
+        Args:
+            filepath (str): a string that contains the absolute path, including the file name, where
+                the visualized image will be saved.
+        """
+        self.fig.savefig(filepath)
+       # self.fig.savefig(filepath[:-4]+'.svg', format='svg')
+
+    def get_image(self):
+        """
+        Returns:
+            ndarray:
+                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
+                The shape is scaled w.r.t the input image using the given `scale` argument.
+        """
+        canvas = self.canvas
+        s, (width, height) = canvas.print_to_buffer()
+        # buf = io.BytesIO()  # works for cairo backend
+        # canvas.print_rgba(buf)
+        # width, height = self.width, self.height
+        # s = buf.getvalue()
+
+        buffer = np.frombuffer(s, dtype="uint8")
+
+        img_rgba = buffer.reshape(height, width, 4)
+        rgb, alpha = np.split(img_rgba, [3], axis=2)
+        return rgb.astype("uint8")
+
+
+class Visualizer:
+    """
+    Visualizer that draws data about detection/segmentation on images.
+
+    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
+    that draw primitive objects to images, as well as high-level wrappers like
+    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
+    that draw composite data in some pre-defined style.
+
+    Note that the exact visualization style for the high-level wrappers are subject to change.
+    Style such as color, opacity, label contents, visibility of labels, or even the visibility
+    of objects themselves (e.g. when the object is too small) may change according
+    to different heuristics, as long as the results still look visually reasonable.
+    To obtain a consistent style, implement custom drawing functions with the primitive
+    methods instead.
+
+    This visualizer focuses on high rendering quality rather than performance. It is not
+    designed to be used for real-time applications.
+    """
+
+    # TODO implement a fast, rasterized version using OpenCV
+
+    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
+                the height and width of the image respectively. C is the number of
+                color channels. The image is required to be in RGB format since that
+                is a requirement of the Matplotlib library. The image is also expected
+                to be in the range [0, 255].
+            metadata (Metadata): image metadata.
+            instance_mode (ColorMode): defines one of the pre-defined style for drawing
+                instances on an image.
+        """
+        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
+        if metadata is None:
+            metadata = MetadataCatalog.get("__nonexist__")
+        self.metadata = metadata
+        self.output = VisImage(self.img, scale=scale)
+        self.cpu_device = torch.device("cpu")
+
+        # too small texts are useless, therefore clamp to 9
+        self._default_font_size = max(
+            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
+        )
+        self._instance_mode = instance_mode
+        with open('chn_cls_list.txt', 'rb') as fp:
+            self.CTLABELS = pickle.load(fp)
+
+    def draw_instance_predictions(self, predictions, path):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = predictions.pred_classes if predictions.has("pred_classes") else None
+        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        #luyao#
+        # labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+        rec = predictions.pred_rec if predictions.has("pred_rec") else None
+        rec_score = predictions.pred_rec_score if predictions.has("pred_rec_score") else None
+        #luyao#
+        if predictions.has("pred_masks"):
+            masks = np.asarray(predictions.pred_masks)
+            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
+        else:
+            masks = None
+        # masks = None
+        
+        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
+            ]
+            #luyao#
+            alpha = 0.8
+        else:
+            colors = None
+            alpha = 0.77
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(
+                (predictions.pred_masks.any(dim=0) > 0).numpy()
+                if predictions.has("pred_masks")
+                else None
+            )
+            alpha = 0.3
+
+        self.overlay_instances(
+            rec=rec,
+            masks=masks,
+            boxes=boxes,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+            scores=scores,
+            path=path,
+            rec_score = rec_score
+        )
+        return self.output
+
+    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8):
+        """
+        Draw semantic segmentation predictions/labels.
+
+        Args:
+            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
+                Each value is the integer label of the pixel.
+            area_threshold (int): segments with less than `area_threshold` are not drawn.
+            alpha (float): the larger it is, the more opaque the segmentations are.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        if isinstance(sem_seg, torch.Tensor):
+            sem_seg = sem_seg.numpy()
+        labels, areas = np.unique(sem_seg, return_counts=True)
+        sorted_idxs = np.argsort(-areas).tolist()
+        labels = labels[sorted_idxs]
+        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
+            except (AttributeError, IndexError):
+                mask_color = None
+
+            binary_mask = (sem_seg == label).astype(np.uint8)
+            text = self.metadata.stuff_classes[label]
+            self.draw_binary_mask(
+                binary_mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        return self.output
+
+    def draw_panoptic_seg_predictions(
+        self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7
+    ):
+        """
+        Draw panoptic prediction results on an image.
+
+        Args:
+            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
+                segment.
+            segments_info (list[dict]): Describe each segment in `panoptic_seg`.
+                Each dict contains keys "id", "category_id", "isthing".
+            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(pred.non_empty_mask())
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            text = self.metadata.stuff_classes[category_idx]
+            self.draw_binary_mask(
+                mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        # draw mask for all instances second
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return self.output
+        masks, sinfo = list(zip(*all_instances))
+        category_ids = [x["category_id"] for x in sinfo]
+
+        try:
+            scores = [x["score"] for x in sinfo]
+        except KeyError:
+            scores = None
+        labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes)
+
+        try:
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids
+            ]
+        except AttributeError:
+            colors = None
+        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
+
+        return self.output
+
+    def draw_dataset_dict(self, dic):
+        """
+        Draw annotations/segmentaions in Detectron2 Dataset format.
+
+        Args:
+            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        annos = dic.get("annotations", None)
+        if annos:
+            if "segmentation" in annos[0]:
+                masks = [x["segmentation"] for x in annos]
+            else:
+                masks = None
+            if "keypoints" in annos[0]:
+                keypts = [x["keypoints"] for x in annos]
+                keypts = np.array(keypts).reshape(len(annos), -1, 3)
+            else:
+                keypts = None
+
+            boxes = [
+                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
+                if len(x["bbox"]) == 4
+                else x["bbox"]
+                for x in annos
+            ]
+
+            labels = [x["category_id"] for x in annos]
+            colors = None
+            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+                colors = [
+                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels
+                ]
+            names = self.metadata.get("thing_classes", None)
+            if names:
+                labels = [names[i] for i in labels]
+            labels = [
+                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
+                for i, a in zip(labels, annos)
+            ]
+            self.overlay_instances(
+                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
+            )
+
+        sem_seg = dic.get("sem_seg", None)
+        if sem_seg is None and "sem_seg_file_name" in dic:
+            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
+                sem_seg = Image.open(f)
+                sem_seg = np.asarray(sem_seg, dtype="uint8")
+        if sem_seg is not None:
+            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
+
+        pan_seg = dic.get("pan_seg", None)
+        if pan_seg is None and "pan_seg_file_name" in dic:
+            assert "segments_info" in dic
+            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
+                pan_seg = Image.open(f)
+                pan_seg = np.asarray(pan_seg)
+                from panopticapi.utils import rgb2id
+
+                pan_seg = rgb2id(pan_seg)
+            segments_info = dic["segments_info"]
+        if pan_seg is not None:
+            pan_seg = torch.Tensor(pan_seg)
+            self.draw_panoptic_seg_predictions(pan_seg, segments_info, area_threshold=0, alpha=0.5)
+        return self.output
+
+    def overlay_instances(
+        self,
+        *,
+        rec=None,
+        boxes=None,
+        labels=None,
+        masks=None,
+        keypoints=None,
+        assigned_colors=None,
+        alpha=0.5,
+        scores,
+        path,
+        rec_score,
+    ):
+        """
+        Args:
+            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
+                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
+                or a :class:`RotatedBoxes`,
+                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image,
+            labels (list[str]): the text to be displayed for each instance.
+            masks (masks-like object): Supported types are:
+
+                * :class:`detectron2.structures.PolygonMasks`,
+                  :class:`detectron2.structures.BitMasks`.
+                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
+                  The first level of the list corresponds to individual instances. The second
+                  level to all the polygon that compose the instance, and the third level
+                  to the polygon coordinates. The third level should have the format of
+                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
+                * list[dict]: each dict is a COCO-style RLE.
+            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
+                where the N is the number of instances and K is the number of keypoints.
+                The last dimension corresponds to (x, y, visibility or score).
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        rec = rec
+        def _ctc_decode_recognition(rec):
+            #CTLABELS = "_0123456789abcdefghijklmnopqrstuvwxyz"
+            # CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
+            # ctc decoding
+            s = ''
+            for c in rec:
+                c = int(c)
+                if c < 5461:
+                    s += str(chr(self.CTLABELS[c]))
+                elif c == 5462:
+                    s += u''
+    
+            return s
+        num_instances = None
+        if boxes is not None:
+            boxes = self._convert_boxes(boxes)
+            num_instances = len(boxes)
+        if masks is not None:
+            masks = self._convert_masks(masks)
+            if num_instances:
+                assert len(masks) == num_instances
+            else:
+                num_instances = len(masks)
+        if keypoints is not None:
+            if num_instances:
+                assert len(keypoints) == num_instances
+            else:
+                num_instances = len(keypoints)
+            keypoints = self._convert_keypoints(keypoints)
+        if labels is not None:
+            assert len(labels) == num_instances
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+        if num_instances == 0:
+            return self.output
+        if boxes is not None and boxes.shape[1] == 5:
+            return self.overlay_rotated_instances(
+                boxes=boxes, labels=labels, assigned_colors=assigned_colors
+            )
+
+        # Display in largest to smallest order to reduce occlusion.
+        areas = None
+        if boxes is not None:
+            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
+        elif masks is not None:
+            areas = np.asarray([x.area() for x in masks])
+
+        if areas is not None:
+            sorted_idxs = np.argsort(-areas).tolist()
+            # Re-order overlapped instances in descending order.
+            boxes = boxes[sorted_idxs] if boxes is not None else None
+            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
+            rec = [rec[idx] for idx in sorted_idxs] if rec is not None else None
+            # rec_score = [rec_score[idx] for idx in sorted_idxs] if rec is not None else None
+            scores = [scores[idx] for idx in sorted_idxs] if scores is not None else None
+            # assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
+            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
+        #luyao#
+        assigned_colors = [[0,113.985,118.955],[216.75,82.875,24.99],[236.895, 176.97, 31.875],[125.97, 46.92, 141.78],[118.83, 171.87, 47.94],[76.755, 189.975, 237.915],[161.925, 19.89, 46.92],\
+            [255,140,0 ],[70,130,180 ],[128,128,0 ],[205,92,92 ],[128,0,128 ],[255,182,193],[255,255,0],[105,105,105],[0,255,255],[0,255,0 ],\
+            [210,180,140],[255,0,0 ],[0,139,139],[255,0,255],[127,255,0],[75,0,130],[32,178,170],[255,215,0],[219,112,147],[148,0,211 ],\
+                [100,149,237],[175,238,238 ],[143,188,143],[255,255,224 ],[244,164,96],[188,143,143],[192,192,192 ],[220,20,60],[218,112,214],[147,112,219]]
+        rec = [_ctc_decode_recognition(rrec) for rrec in rec]
+        # assigned_colors = [[1,140/255,0],[30/255,144/255,1],[148/255,0,211/255],[0,1,1],[1,0,0],\
+        #     [30/255,143/255,1],[0.94,0.5,0.5],[1,1,0],[0.5,0.5,0],[0.823,0.412,0.117],[0.58,0,0.827],[0.5,0,0]\
+        #     ,[0.82,0.41,0.12],[0.41,0.41,0.41],[0,0.54,0.54],[0.75,0.25,0.65],[0.2,0.6,0.8],[0.74,0,0.3],[0,1.0,0.4],[1,0.5,0.5],[0.5,0.5,1]\
+        #         ,[0.6,0,1],[0.56,0.56,0.3],[0,1,0],[1.0,0.0,0.4],[0.0,1.0,0.4],[0.0,0.5,1.0],[1,215/255,0]]
+        poly = []
+        alpha = 0.4
+        for i in range(num_instances):
+            if masks is not None:
+                poly.append(masks[i].polygons[0].astype(int).reshape(-1,2))
+        keep = py_cpu_pnms(poly,scores,0.5)
+        for i in range(num_instances):
+            # if rec[i] == ' ':
+            #     continue
+            if i not in keep:
+                continue
+            # color = assigned_colors[i]
+            # print(i)
+            color_ = assigned_colors[i%len(assigned_colors)]
+            color = [x/255 for x in color_]
+            # if boxes is not None:
+            #     self.draw_box(boxes[i], edge_color=color)
+            #luyao
+            # alpha = 0.6
+            H, W, _ = self.img.shape
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    segment = polygon2rbox(segment, H, W)
+                    segment = np.array(segment)
+                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
+            if labels is not None:
+                # first get a box
+                if boxes is not None:
+                    #luyao#
+                    x0, y0, x1, y1 = boxes[i]
+                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
+                    horiz_align = "left"
+                elif masks is not None:
+                    # skip small mask without polygon
+                    if len(masks[i].polygons) == 0:
+                        continue
+
+                    x0, y0, x1, y1 = masks[i].bbox()
+
+                    # draw text in the center (defined by median) when box is not drawn
+                    # median is less sensitive to outliers.
+                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
+                    horiz_align = "center"
+                else:
+                    continue  # drawing the box confidence for keypoints isn't very useful.
+                # for small objects, draw text at the side to avoid occlusion
+
+                instance_area = (y1 - y0) * (x1 - x0)
+                # print(x0,' ',x1,' ',y0,' ',y1,' ',self.output.height,' ', self.output.width)
+                #luyao#
+                if y0<5:
+                    text_pos = ((x0+x1)//2,(y0+y1)//2)
+                #luyao#
+                # if (
+                #     instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
+                #     or y1 - y0 < 40 * self.output.scale
+                # ):
+                #     if y1 >= self.output.height - 5:
+                #         text_pos = (x1, y0)
+                #     else:
+                #         text_pos = (x0, y1)
+
+                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
+                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+                font_size = (
+                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
+                    * 1.0
+                    * self._default_font_size
+                )
+                self.draw_text(
+                #    labels[i],
+                    # '',
+                    rec[i],
+                    text_pos,
+                    color=lighter_color,
+                    horizontal_alignment=horiz_align,
+                    font_size=font_size,
+                )
+        # draw keypoints
+        if keypoints is not None:
+            for keypoints_per_instance in keypoints:
+                self.draw_and_connect_keypoints(keypoints_per_instance)
+
+        return self.output
+
+    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
+        """
+        Args:
+            boxes (ndarray): an Nx5 numpy array of
+                (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image.
+            labels (list[str]): the text to be displayed for each instance.
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = len(boxes)
+
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+
+        if num_instances == 0:
+            return self.output
+
+        # Display in largest to smallest order to reduce occlusion.
+        if boxes is not None:
+            areas = boxes[:, 2] * boxes[:, 3]
+
+        sorted_idxs = np.argsort(-areas).tolist()
+        # Re-order overlapped instances in descending order.
+        boxes = boxes[sorted_idxs]
+        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+        colors = [assigned_colors[idx] for idx in sorted_idxs]
+
+        for i in range(num_instances):
+            self.draw_rotated_box_with_label(
+                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
+            )
+
+        return self.output
+
+    def draw_and_connect_keypoints(self, keypoints):
+        """
+        Draws keypoints of an instance and follows the rules for keypoint connections
+        to draw lines between appropriate keypoints. This follows color heuristics for
+        line color.
+
+        Args:
+            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
+                and the last dimension corresponds to (x, y, probability).
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        visible = {}
+        keypoint_names = self.metadata.get("keypoint_names")
+        for idx, keypoint in enumerate(keypoints):
+            # draw keypoint
+            x, y, prob = keypoint
+            if prob > _KEYPOINT_THRESHOLD:
+                self.draw_circle((x, y), color=_RED)
+                if keypoint_names:
+                    keypoint_name = keypoint_names[idx]
+                    visible[keypoint_name] = (x, y)
+
+        if self.metadata.get("keypoint_connection_rules"):
+            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
+                if kp0 in visible and kp1 in visible:
+                    x0, y0 = visible[kp0]
+                    x1, y1 = visible[kp1]
+                    color = tuple(x / 255.0 for x in color)
+                    self.draw_line([x0, x1], [y0, y1], color=color)
+
+        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
+        # Note that this strategy is specific to person keypoints.
+        # For other keypoints, it should just do nothing
+        try:
+            ls_x, ls_y = visible["left_shoulder"]
+            rs_x, rs_y = visible["right_shoulder"]
+            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
+        except KeyError:
+            pass
+        else:
+            # draw line from nose to mid-shoulder
+            nose_x, nose_y = visible.get("nose", (None, None))
+            if nose_x is not None:
+                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
+
+            try:
+                # draw line from mid-shoulder to mid-hip
+                lh_x, lh_y = visible["left_hip"]
+                rh_x, rh_y = visible["right_hip"]
+            except KeyError:
+                pass
+            else:
+                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
+                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
+        return self.output
+
+    """
+    Primitive drawing functions:
+    """
+
+    def draw_text(
+        self,
+        text,
+        position,
+        *,
+        font_size=None,
+        color="g",
+        horizontal_alignment="center",
+        rotation=0
+    ):
+        """
+        Args:
+            text (str): class label
+            position (tuple): a tuple of the x and y coordinates to place text on image.
+            font_size (int, optional): font of the text. If not provided, a font size
+                proportional to the image width is calculated and used.
+            color: color of the text. Refer to `matplotlib.colors` for full list
+                of formats that are accepted.
+            horizontal_alignment (str): see `matplotlib.text.Text`
+            rotation: rotation angle in degrees CCW
+
+        Returns:
+            output (VisImage): image object with text drawn.
+        """
+        if not font_size:
+            font_size = self._default_font_size
+
+        # print(font_size, self.output.scale)
+
+        # since the text background is dark, we don't want the text to be dark
+        # color = np.maximum(list(mplc.to_rgb(color)), 0.2)
+        # color[np.argmax(color)] = max(0.8, np.max(color))
+        #luyao#
+        color = 'w'
+        # font_size = 7.0
+        x, y = position
+        font_path = "simsun.ttc"
+        prop = mfm.FontProperties(fname=font_path)
+        self.output.ax.text(
+            x,
+            y,
+            text,
+            size=font_size * self.output.scale,
+            # family="sans-serif",
+            family="monospace",
+            # family="serif",
+            #luyao#
+            bbox={"facecolor": "black", "alpha": 0.0, "pad": 0.0, "edgecolor": "none"},
+            # bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
+            # verticalalignment="top",
+            verticalalignment="bottom",
+            horizontalalignment=horizontal_alignment,
+            color=color,
+            zorder=10,
+            
+            rotation=rotation,
+            fontproperties=prop,
+            #luyao
+            # fontweight='light'
+        )
+        return self.output
+
+    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
+        """
+        Args:
+            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
+                are the coordinates of the image's top left corner. x1 and y1 are the
+                coordinates of the image's bottom right corner.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x0, y0, x1, y1 = box_coord
+        width = x1 - x0
+        height = y1 - y0
+
+        # linewidth = max(self._default_font_size / 16, 1)
+        # linewidth = max(self._default_font_size / 4, 1)
+        #luyao#
+        edge_color=[0.196,0.80,0.196]
+        alpha = 1.0
+        linewidth = 0.7
+
+        self.output.ax.add_patch(
+            mpl.patches.Rectangle(
+                (x0, y0),
+                width,
+                height,
+                fill=False,
+                edgecolor=edge_color,
+                linewidth=linewidth * self.output.scale,
+                alpha=alpha,
+                linestyle=line_style,
+            )
+        )
+        return self.output
+
+    def draw_rotated_box_with_label(
+        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
+    ):
+        """
+        Draw a rotated box with label on its top-left corner.
+
+        Args:
+            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
+                where cnt_x and cnt_y are the center coordinates of the box.
+                w and h are the width and height of the box. angle represents how
+                many degrees the box is rotated CCW with regard to the 0-degree box.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+            label (string): label for rotated box. It will not be rendered when set to None.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        cnt_x, cnt_y, w, h, angle = rotated_box
+        area = w * h
+        # use thinner lines when the box is small
+        linewidth = self._default_font_size / (
+            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
+        )
+
+        theta = angle * math.pi / 180.0
+        c = math.cos(theta)
+        s = math.sin(theta)
+        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
+        # x: left->right ; y: top->down
+        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
+        for k in range(4):
+            j = (k + 1) % 4
+            self.draw_line(
+                [rotated_rect[k][0], rotated_rect[j][0]],
+                [rotated_rect[k][1], rotated_rect[j][1]],
+                color=edge_color,
+                linestyle="--" if k == 1 else line_style,
+                linewidth=linewidth,
+            )
+
+        if label is not None:
+            text_pos = rotated_rect[1]  # topleft corner
+
+            height_ratio = h / np.sqrt(self.output.height * self.output.width)
+            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
+            font_size = (
+                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
+            )
+            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
+
+        return self.output
+
+    def draw_circle(self, circle_coord, color, radius=3):
+        """
+        Args:
+            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
+                of the center of the circle.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            radius (int): radius of the circle.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x, y = circle_coord
+        self.output.ax.add_patch(
+            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
+        )
+        return self.output
+
+    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
+        """
+        Args:
+            x_data (list[int]): a list containing x values of all the points being drawn.
+                Length of list should match the length of y_data.
+            y_data (list[int]): a list containing y values of all the points being drawn.
+                Length of list should match the length of x_data.
+            color: color of the line. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
+                for a full list of formats that are accepted.
+            linewidth (float or None): width of the line. When it's None,
+                a default value will be computed and used.
+
+        Returns:
+            output (VisImage): image object with line drawn.
+        """
+        if linewidth is None:
+            linewidth = self._default_font_size / 3
+        linewidth = max(linewidth, 1)
+        self.output.ax.add_line(
+            mpl.lines.Line2D(
+                x_data,
+                y_data,
+                linewidth=linewidth * self.output.scale,
+                color=color,
+                linestyle=linestyle,
+            )
+        )
+        return self.output
+
+    def draw_binary_mask(
+        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=0
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn in the object's center of mass.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component small than this will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = False
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+
+        if not mask.has_holes:
+            # draw polygons for regular masks
+            for segment in mask.polygons:
+                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
+                if area < (area_threshold or 0):
+                    continue
+                has_valid_segment = True
+                segment = segment.reshape(-1, 2)
+                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
+        else:
+            # TODO: Use Path/PathPatch to draw vector graphics:
+            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+            rgba = np.zeros(shape2d + (4,), dtype="float32")
+            rgba[:, :, :3] = color
+            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+            has_valid_segment = True
+            self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if text is not None and has_valid_segment:
+            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+            largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+            # draw text on the largest component, as well as other very large components.
+            for cid in range(1, _num_cc):
+                if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+                    # median is more stable than centroid
+                    # center = centroids[largest_component_id]
+                    center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+                    self.draw_text(text, center, color=lighter_color)
+        return self.output
+
+    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
+        """
+        Args:
+            segment: numpy array of shape Nx2, containing all the points in the polygon.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted. If not provided, a darker shade
+                of the polygon color will be used instead.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with polygon drawn.
+        """
+        #luyao#
+        # edge_color = []
+        if edge_color is None:
+            # make edge color darker than the polygon color
+            if alpha > 0.8:
+                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
+            else:
+                edge_color = color
+        edge_color = mplc.to_rgb(edge_color) + (1,)
+
+        polygon = mpl.patches.Polygon(
+            segment,
+            fill=True,
+            facecolor=mplc.to_rgb(color) + (alpha,),
+            #luyao# qudiaomaskyanse
+            # edgecolor=edge_color,
+            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
+        )
+        self.output.ax.add_patch(polygon)
+        return self.output
+
+    """
+    Internal methods:
+    """
+
+    def _jitter(self, color):
+        """
+        Randomly modifies given color to produce a slightly different color than the color given.
+
+        Args:
+            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
+                picked. The values in the list are in the [0.0, 1.0] range.
+
+        Returns:
+            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
+                color after being jittered. The values in the list are in the [0.0, 1.0] range.
+        """
+        color = mplc.to_rgb(color)
+        vec = np.random.rand(3)
+        # better to do it in another color space
+        vec = vec / np.linalg.norm(vec) * 0.5
+        res = np.clip(vec + color, 0, 1)
+        return tuple(res)
+
+    def _create_grayscale_image(self, mask=None):
+        """
+        Create a grayscale version of the original image.
+        The colors in masked area, if given, will be kept.
+        """
+        img_bw = self.img.astype("f4").mean(axis=2)
+        img_bw = np.stack([img_bw] * 3, axis=2)
+        if mask is not None:
+            img_bw[mask] = self.img[mask]
+        return img_bw
+
+    def _change_color_brightness(self, color, brightness_factor):
+        """
+        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
+        less or more saturation than the original color.
+
+        Args:
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
+                0 will correspond to no change, a factor in [-1.0, 0) range will result in
+                a darker color and a factor in (0, 1.0] range will result in a lighter color.
+
+        Returns:
+            modified_color (tuple[double]): a tuple containing the RGB values of the
+                modified color. Each value in the tuple is in the [0.0, 1.0] range.
+        """
+        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
+        color = mplc.to_rgb(color)
+        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
+        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
+        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
+        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
+        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
+        return modified_color
+
+    def _convert_boxes(self, boxes):
+        """
+        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
+        """
+        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+            return boxes.tensor.numpy()
+        else:
+            return np.asarray(boxes)
+
+    def _convert_masks(self, masks_or_polygons):
+        """
+        Convert different format of masks or polygons to a tuple of masks and polygons.
+
+        Returns:
+            list[GenericMask]:
+        """
+
+        m = masks_or_polygons
+        if isinstance(m, PolygonMasks):
+            m = m.polygons
+        if isinstance(m, BitMasks):
+            m = m.tensor.numpy()
+        if isinstance(m, torch.Tensor):
+            m = m.numpy()
+        ret = []
+        for x in m:
+            if isinstance(x, GenericMask):
+                ret.append(x)
+            else:
+                ret.append(GenericMask(x, self.output.height, self.output.width))
+        return ret
+
+    def _convert_keypoints(self, keypoints):
+        if isinstance(keypoints, Keypoints):
+            keypoints = keypoints.tensor
+        keypoints = np.asarray(keypoints)
+        return keypoints
+
+    def get_output(self):
+        """
+        Returns:
+            output (VisImage): the image output containing the visualizations added
+            to the image.
+        """
+        return self.output
+        
+def polygon2rbox(polygon, image_height, image_width):
+    poly = np.array(polygon).reshape((-1, 2)).astype(np.float32)
+    rect = cv2.minAreaRect(poly)
+    corners = cv2.boxPoints(rect)
+    corners = np.array(corners, dtype="int")
+    pts = get_tight_rect(corners, 0, 0, image_height, image_width, 1)
+    pts = list(map(int, pts))
+    return pts
+
+def get_tight_rect(points, start_x, start_y, image_height, image_width, scale):
+    points = list(points)
+    ps = sorted(points, key=lambda x: x[0])
+
+    if ps[1][1] > ps[0][1]:
+        px1 = ps[0][0] * scale + start_x
+        py1 = ps[0][1] * scale + start_y
+        px4 = ps[1][0] * scale + start_x
+        py4 = ps[1][1] * scale + start_y
+    else:
+        px1 = ps[1][0] * scale + start_x
+        py1 = ps[1][1] * scale + start_y
+        px4 = ps[0][0] * scale + start_x
+        py4 = ps[0][1] * scale + start_y
+    if ps[3][1] > ps[2][1]:
+        px2 = ps[2][0] * scale + start_x
+        py2 = ps[2][1] * scale + start_y
+        px3 = ps[3][0] * scale + start_x
+        py3 = ps[3][1] * scale + start_y
+    else:
+        px2 = ps[3][0] * scale + start_x
+        py2 = ps[3][1] * scale + start_y
+        px3 = ps[2][0] * scale + start_x
+        py3 = ps[2][1] * scale + start_y
+
+    px1 = min(max(px1, 1), image_width - 1)
+    px2 = min(max(px2, 1), image_width - 1)
+    px3 = min(max(px3, 1), image_width - 1)
+    px4 = min(max(px4, 1), image_width - 1)
+    py1 = min(max(py1, 1), image_height - 1)
+    py2 = min(max(py2, 1), image_height - 1)
+    py3 = min(max(py3, 1), image_height - 1)
+    py4 = min(max(py4, 1), image_height - 1)
+    return [px1, py1, px2, py2, px3, py3, px4, py4]
diff --git a/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/visualizer_vintext.py b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/visualizer_vintext.py
new file mode 100644
index 0000000000000000000000000000000000000000..175c5733c67383ab78bb8fa36bcade3736214fc6
--- /dev/null
+++ b/src/sts/build/lib.linux-x86_64-cpython-38/detectron2/utils/visualizer_vintext.py
@@ -0,0 +1,1544 @@
+# Edit by Yao Lu
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+import colorsys
+import logging
+import math
+import numpy as np
+from enum import Enum, unique
+import cv2
+import matplotlib as mpl
+import matplotlib.colors as mplc
+import matplotlib.figure as mplfigure
+import pycocotools.mask as mask_util
+import torch
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from .colormap import random_color
+from shapely.geometry import *
+import matplotlib.font_manager as mfm
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["ColorMode", "VisImage", "Visualizer"]
+
+
+_SMALL_OBJECT_AREA_THRESH = 1000
+_LARGE_MASK_AREA_THRESH = 120000
+_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
+_BLACK = (0, 0, 0)
+_RED = (1.0, 0, 0)
+
+_KEYPOINT_THRESHOLD = 0.05
+
+
+def py_cpu_pnms(dets, scores, thresh):
+    pts = dets
+    # for i in xrange(dets.shape[0]):
+    #     pts.append([[int(bbox[i, 0]) + info_bbox[i, j], int(bbox[i, 1]) + info_bbox[i, j+1]] for j in xrange(0,28,2)])
+    scores = np.array(scores)
+    order = scores.argsort()[::-1]
+    areas = np.zeros(scores.shape)
+    order = scores.argsort()[::-1]
+    inter_areas = np.zeros((scores.shape[0], scores.shape[0]))
+    for il in range(len(pts)):
+        poly = Polygon(pts[il]).buffer(0.001)
+        areas[il] = poly.area
+        for jl in range(il, len(pts)):
+            polyj = Polygon(pts[jl].tolist()).buffer(0.001)
+            inS = poly.intersection(polyj)
+            try:
+                inter_areas[il][jl] = inS.area
+            except:
+                import pdb;pdb.set_trace()
+            inter_areas[jl][il] = inS.area
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        ovr = inter_areas[i][order[1:]] / ((areas[i]) + areas[order[1:]] - inter_areas[i][order[1:]])
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+@unique
+class ColorMode(Enum):
+    """
+    Enum of different color modes to use for instance visualizations.
+    """
+
+    IMAGE = 0
+    """
+    Picks a random color for every instance and overlay segmentations with low opacity.
+    """
+    SEGMENTATION = 1
+    """
+    Let instances of the same category have similar colors
+    (from metadata.thing_colors), and overlay them with
+    high opacity. This provides more attention on the quality of segmentation.
+    """
+    IMAGE_BW = 2
+    """
+    Same as IMAGE, but convert all areas without masks to gray-scale.
+    Only available for drawing per-instance mask predictions.
+    """
+
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (height, width), m.shape
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        #res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+
+class _PanopticPrediction:
+    def __init__(self, panoptic_seg, segments_info, metadata=None):
+        if segments_info is None:
+            assert metadata is not None
+            # If "segments_info" is None, we assume "panoptic_img" is a
+            # H*W int32 image storing the panoptic_id in the format of
+            # category_id * label_divisor + instance_id. We reserve -1 for
+            # VOID label.
+            label_divisor = metadata.label_divisor
+            segments_info = []
+            for panoptic_label in np.unique(panoptic_seg.numpy()):
+                if panoptic_label == -1:
+                    # VOID region.
+                    continue
+                pred_class = panoptic_label // label_divisor
+                isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
+                segments_info.append(
+                    {
+                        "id": int(panoptic_label),
+                        "category_id": int(pred_class),
+                        "isthing": bool(isthing),
+                    }
+                )
+        del metadata
+
+        self._seg = panoptic_seg
+
+        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
+        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
+        areas = areas.numpy()
+        sorted_idxs = np.argsort(-areas)
+        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
+        self._seg_ids = self._seg_ids.tolist()
+        for sid, area in zip(self._seg_ids, self._seg_areas):
+            if sid in self._sinfo:
+                self._sinfo[sid]["area"] = float(area)
+
+    def non_empty_mask(self):
+        """
+        Returns:
+            (H, W) array, a mask for all pixels that have a prediction
+        """
+        empty_ids = []
+        for id in self._seg_ids:
+            if id not in self._sinfo:
+                empty_ids.append(id)
+        if len(empty_ids) == 0:
+            return np.zeros(self._seg.shape, dtype=np.uint8)
+        assert (
+            len(empty_ids) == 1
+        ), ">1 ids corresponds to no labels. This is currently not supported"
+        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
+
+    def semantic_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or sinfo["isthing"]:
+                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
+                continue
+            yield (self._seg == sid).numpy().astype(np.bool), sinfo
+
+    def instance_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or not sinfo["isthing"]:
+                continue
+            mask = (self._seg == sid).numpy().astype(np.bool)
+            if mask.sum() > 0:
+                yield mask, sinfo
+
+
+def _create_text_labels(classes, scores, class_names):
+    """
+    Args:
+        classes (list[int] or None):
+        scores (list[float] or None):
+        class_names (list[str] or None):
+
+    Returns:
+        list[str] or None
+    """
+    labels = None
+    if classes is not None and class_names is not None and len(class_names) > 0:
+        labels = [class_names[i] for i in classes]
+    if scores is not None:
+        if labels is None:
+            labels = ["{:.0f}%".format(s * 100) for s in scores]
+        else:
+            # labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
+            #luyao
+            labels = ["{}.{:.0f}".format(l, s * 100) for l, s in zip(labels, scores)]
+    return labels
+
+
+class VisImage:
+    def __init__(self, img, scale=1.0):
+        """
+        Args:
+            img (ndarray): an RGB image of shape (H, W, 3).
+            scale (float): scale the input image
+        """
+        self.img = img
+        self.scale = scale
+        self.width, self.height = img.shape[1], img.shape[0]
+        self._setup_figure(img)
+
+    def _setup_figure(self, img):
+        """
+        Args:
+            Same as in :meth:`__init__()`.
+
+        Returns:
+            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
+            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
+        """
+        fig = mplfigure.Figure(frameon=False)
+        self.dpi = fig.get_dpi()
+        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
+        # (https://github.com/matplotlib/matplotlib/issues/15363)
+        fig.set_size_inches(
+            (self.width * self.scale + 1e-2) / self.dpi,
+            (self.height * self.scale + 1e-2) / self.dpi,
+        )
+        self.canvas = FigureCanvasAgg(fig)
+        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
+        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
+        ax.axis("off")
+        # Need to imshow this first so that other patches can be drawn on top
+        ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
+
+        self.fig = fig
+        self.ax = ax
+
+    def save(self, filepath):
+        """
+        Args:
+            filepath (str): a string that contains the absolute path, including the file name, where
+                the visualized image will be saved.
+        """
+        self.fig.savefig(filepath)
+       # self.fig.savefig(filepath[:-4]+'.svg', format='svg')
+
+    def get_image(self):
+        """
+        Returns:
+            ndarray:
+                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
+                The shape is scaled w.r.t the input image using the given `scale` argument.
+        """
+        canvas = self.canvas
+        s, (width, height) = canvas.print_to_buffer()
+        # buf = io.BytesIO()  # works for cairo backend
+        # canvas.print_rgba(buf)
+        # width, height = self.width, self.height
+        # s = buf.getvalue()
+
+        buffer = np.frombuffer(s, dtype="uint8")
+
+        img_rgba = buffer.reshape(height, width, 4)
+        rgb, alpha = np.split(img_rgba, [3], axis=2)
+        return rgb.astype("uint8")
+
+
+class Visualizer:
+    """
+    Visualizer that draws data about detection/segmentation on images.
+
+    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
+    that draw primitive objects to images, as well as high-level wrappers like
+    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
+    that draw composite data in some pre-defined style.
+
+    Note that the exact visualization style for the high-level wrappers are subject to change.
+    Style such as color, opacity, label contents, visibility of labels, or even the visibility
+    of objects themselves (e.g. when the object is too small) may change according
+    to different heuristics, as long as the results still look visually reasonable.
+    To obtain a consistent style, implement custom drawing functions with the primitive
+    methods instead.
+
+    This visualizer focuses on high rendering quality rather than performance. It is not
+    designed to be used for real-time applications.
+    """
+
+    # TODO implement a fast, rasterized version using OpenCV
+
+    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
+                the height and width of the image respectively. C is the number of
+                color channels. The image is required to be in RGB format since that
+                is a requirement of the Matplotlib library. The image is also expected
+                to be in the range [0, 255].
+            metadata (Metadata): image metadata.
+            instance_mode (ColorMode): defines one of the pre-defined style for drawing
+                instances on an image.
+        """
+        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
+        if metadata is None:
+            metadata = MetadataCatalog.get("__nonexist__")
+        self.metadata = metadata
+        self.output = VisImage(self.img, scale=scale)
+        self.cpu_device = torch.device("cpu")
+
+        # too small texts are useless, therefore clamp to 9
+        self._default_font_size = max(
+            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
+        )
+        self._instance_mode = instance_mode
+
+    def draw_instance_predictions(self, predictions, path):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = predictions.pred_classes if predictions.has("pred_classes") else None
+        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        #luyao#
+        # labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+        rec = predictions.pred_rec if predictions.has("pred_rec") else None
+        rec_score = predictions.pred_rec_score if predictions.has("pred_rec_score") else None
+        #luyao#
+        if predictions.has("pred_masks"):
+            masks = np.asarray(predictions.pred_masks)
+            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
+        else:
+            masks = None
+        # masks = None
+        
+        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
+            ]
+            #luyao#
+            alpha = 0.8
+        else:
+            colors = None
+            alpha = 0.77
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(
+                (predictions.pred_masks.any(dim=0) > 0).numpy()
+                if predictions.has("pred_masks")
+                else None
+            )
+            alpha = 0.3
+
+        self.overlay_instances(
+            rec=rec,
+            masks=masks,
+            boxes=boxes,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+            scores=scores,
+            path=path,
+            rec_score = rec_score
+        )
+        return self.output
+
+    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8):
+        """
+        Draw semantic segmentation predictions/labels.
+
+        Args:
+            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
+                Each value is the integer label of the pixel.
+            area_threshold (int): segments with less than `area_threshold` are not drawn.
+            alpha (float): the larger it is, the more opaque the segmentations are.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        if isinstance(sem_seg, torch.Tensor):
+            sem_seg = sem_seg.numpy()
+        labels, areas = np.unique(sem_seg, return_counts=True)
+        sorted_idxs = np.argsort(-areas).tolist()
+        labels = labels[sorted_idxs]
+        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
+            except (AttributeError, IndexError):
+                mask_color = None
+
+            binary_mask = (sem_seg == label).astype(np.uint8)
+            text = self.metadata.stuff_classes[label]
+            self.draw_binary_mask(
+                binary_mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        return self.output
+
+    def draw_panoptic_seg_predictions(
+        self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7
+    ):
+        """
+        Draw panoptic prediction results on an image.
+
+        Args:
+            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
+                segment.
+            segments_info (list[dict]): Describe each segment in `panoptic_seg`.
+                Each dict contains keys "id", "category_id", "isthing".
+            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(pred.non_empty_mask())
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            text = self.metadata.stuff_classes[category_idx]
+            self.draw_binary_mask(
+                mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        # draw mask for all instances second
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return self.output
+        masks, sinfo = list(zip(*all_instances))
+        category_ids = [x["category_id"] for x in sinfo]
+
+        try:
+            scores = [x["score"] for x in sinfo]
+        except KeyError:
+            scores = None
+        labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes)
+
+        try:
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids
+            ]
+        except AttributeError:
+            colors = None
+        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
+
+        return self.output
+
+    def draw_dataset_dict(self, dic):
+        """
+        Draw annotations/segmentaions in Detectron2 Dataset format.
+
+        Args:
+            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        annos = dic.get("annotations", None)
+        if annos:
+            if "segmentation" in annos[0]:
+                masks = [x["segmentation"] for x in annos]
+            else:
+                masks = None
+            if "keypoints" in annos[0]:
+                keypts = [x["keypoints"] for x in annos]
+                keypts = np.array(keypts).reshape(len(annos), -1, 3)
+            else:
+                keypts = None
+
+            boxes = [
+                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
+                if len(x["bbox"]) == 4
+                else x["bbox"]
+                for x in annos
+            ]
+
+            labels = [x["category_id"] for x in annos]
+            colors = None
+            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+                colors = [
+                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels
+                ]
+            names = self.metadata.get("thing_classes", None)
+            if names:
+                labels = [names[i] for i in labels]
+            labels = [
+                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
+                for i, a in zip(labels, annos)
+            ]
+            self.overlay_instances(
+                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
+            )
+
+        sem_seg = dic.get("sem_seg", None)
+        if sem_seg is None and "sem_seg_file_name" in dic:
+            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
+                sem_seg = Image.open(f)
+                sem_seg = np.asarray(sem_seg, dtype="uint8")
+        if sem_seg is not None:
+            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
+
+        pan_seg = dic.get("pan_seg", None)
+        if pan_seg is None and "pan_seg_file_name" in dic:
+            assert "segments_info" in dic
+            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
+                pan_seg = Image.open(f)
+                pan_seg = np.asarray(pan_seg)
+                from panopticapi.utils import rgb2id
+
+                pan_seg = rgb2id(pan_seg)
+            segments_info = dic["segments_info"]
+        if pan_seg is not None:
+            pan_seg = torch.Tensor(pan_seg)
+            self.draw_panoptic_seg_predictions(pan_seg, segments_info, area_threshold=0, alpha=0.5)
+        return self.output
+
+    def overlay_instances(
+        self,
+        *,
+        rec=None,
+        boxes=None,
+        labels=None,
+        masks=None,
+        keypoints=None,
+        assigned_colors=None,
+        alpha=0.5,
+        scores,
+        path,
+        rec_score,
+    ):
+        """
+        Args:
+            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
+                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
+                or a :class:`RotatedBoxes`,
+                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image,
+            labels (list[str]): the text to be displayed for each instance.
+            masks (masks-like object): Supported types are:
+
+                * :class:`detectron2.structures.PolygonMasks`,
+                  :class:`detectron2.structures.BitMasks`.
+                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
+                  The first level of the list corresponds to individual instances. The second
+                  level to all the polygon that compose the instance, and the third level
+                  to the polygon coordinates. The third level should have the format of
+                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
+                * list[dict]: each dict is a COCO-style RLE.
+            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
+                where the N is the number of instances and K is the number of keypoints.
+                The last dimension corresponds to (x, y, visibility or score).
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        rec = rec
+        def _ctc_decode_recognition(rec):
+            # CTLABELS = "_0123456789abcdefghijklmnopqrstuvwxyz"
+            # CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
+            CTLABELS = [
+            " ",
+            "!",
+            '"',
+            "#",
+            "$",
+            "%",
+            "&",
+            "'",
+            "(",
+            ")",
+            "*",
+            "+",
+            ",",
+            "-",
+            ".",
+            "/",
+            "0",
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9",
+            ":",
+            ";",
+            "<",
+            "=",
+            ">",
+            "?",
+            "@",
+            "A",
+            "B",
+            "C",
+            "D",
+            "E",
+            "F",
+            "G",
+            "H",
+            "I",
+            "J",
+            "K",
+            "L",
+            "M",
+            "N",
+            "O",
+            "P",
+            "Q",
+            "R",
+            "S",
+            "T",
+            "U",
+            "V",
+            "W",
+            "X",
+            "Y",
+            "Z",
+            "[",
+            "\\",
+            "]",
+            "^",
+            "_",
+            "`",
+            "a",
+            "b",
+            "c",
+            "d",
+            "e",
+            "f",
+            "g",
+            "h",
+            "i",
+            "j",
+            "k",
+            "l",
+            "m",
+            "n",
+            "o",
+            "p",
+            "q",
+            "r",
+            "s",
+            "t",
+            "u",
+            "v",
+            "w",
+            "x",
+            "y",
+            "z",
+            "{",
+            "|",
+            "}",
+            "~",
+            "ˋ",
+            "ˊ",
+            "﹒",
+            "ˀ",
+            "˜",
+            "ˇ",
+            "ˆ",
+            "˒",
+            "‑",
+        ]
+            # ctc decoding
+            last_char = False
+            s = ''
+            for c in rec:
+                c = int(c)
+                if 0<c < 107:
+                        s += CTLABELS[c-1]
+                        last_char = c
+                elif c == 0:
+                    s += u''
+                else:
+                    last_char = False
+            if len(s) == 0:
+                s = ' '
+            s = decoder(s)
+            return s
+        num_instances = None
+        if boxes is not None:
+            boxes = self._convert_boxes(boxes)
+            num_instances = len(boxes)
+        if masks is not None:
+            masks = self._convert_masks(masks)
+            if num_instances:
+                assert len(masks) == num_instances
+            else:
+                num_instances = len(masks)
+        if keypoints is not None:
+            if num_instances:
+                assert len(keypoints) == num_instances
+            else:
+                num_instances = len(keypoints)
+            keypoints = self._convert_keypoints(keypoints)
+        if labels is not None:
+            assert len(labels) == num_instances
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+        if num_instances == 0:
+            return self.output
+        if boxes is not None and boxes.shape[1] == 5:
+            return self.overlay_rotated_instances(
+                boxes=boxes, labels=labels, assigned_colors=assigned_colors
+            )
+
+        # Display in largest to smallest order to reduce occlusion.
+        areas = None
+        if boxes is not None:
+            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
+        elif masks is not None:
+            areas = np.asarray([x.area() for x in masks])
+
+        if areas is not None:
+            sorted_idxs = np.argsort(-areas).tolist()
+            # Re-order overlapped instances in descending order.
+            boxes = boxes[sorted_idxs] if boxes is not None else None
+            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
+            rec = [rec[idx] for idx in sorted_idxs] if rec is not None else None
+            # rec_score = [rec_score[idx] for idx in sorted_idxs] if rec is not None else None
+            scores = [scores[idx] for idx in sorted_idxs] if scores is not None else None
+            # assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
+            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
+        #luyao#
+        assigned_colors = [[0,113.985,118.955],[216.75,82.875,24.99],[236.895, 176.97, 31.875],[125.97, 46.92, 141.78],[118.83, 171.87, 47.94],[76.755, 189.975, 237.915],[161.925, 19.89, 46.92],\
+            [255,140,0 ],[70,130,180 ],[128,128,0 ],[205,92,92 ],[128,0,128 ],[255,182,193],[255,255,0],[105,105,105],[0,255,255],[0,255,0 ],\
+            [210,180,140],[255,0,0 ],[0,139,139],[255,0,255],[127,255,0],[75,0,130],[32,178,170],[255,215,0],[219,112,147],[148,0,211 ],\
+                [100,149,237],[175,238,238 ],[143,188,143],[255,255,224 ],[244,164,96],[188,143,143],[192,192,192 ],[220,20,60],[218,112,214],[147,112,219]]
+        rec = [_ctc_decode_recognition(rrec) for rrec in rec]
+        # assigned_colors = [[1,140/255,0],[30/255,144/255,1],[148/255,0,211/255],[0,1,1],[1,0,0],\
+        #     [30/255,143/255,1],[0.94,0.5,0.5],[1,1,0],[0.5,0.5,0],[0.823,0.412,0.117],[0.58,0,0.827],[0.5,0,0]\
+        #     ,[0.82,0.41,0.12],[0.41,0.41,0.41],[0,0.54,0.54],[0.75,0.25,0.65],[0.2,0.6,0.8],[0.74,0,0.3],[0,1.0,0.4],[1,0.5,0.5],[0.5,0.5,1]\
+        #         ,[0.6,0,1],[0.56,0.56,0.3],[0,1,0],[1.0,0.0,0.4],[0.0,1.0,0.4],[0.0,0.5,1.0],[1,215/255,0]]
+        poly = []
+        for i in range(num_instances):
+            if masks is not None:
+                bbp = 0
+                for segment in masks[i].polygons:
+                    if bbp==0:
+                        poly.append(segment.astype(int).reshape(-1,2))
+                        bbp = 1
+        keep = py_cpu_pnms(poly,scores,0.5)
+        for i in range(num_instances):
+            # if rec[i] == ' ':
+            #     continue
+            if i not in keep:
+                continue
+            # color = assigned_colors[i]
+            # print(i)
+            color_ = assigned_colors[i%len(assigned_colors)]
+            color = [x/255 for x in color_]
+            # if boxes is not None:
+            #     self.draw_box(boxes[i], edge_color=color)
+            #luyao
+            # alpha = 0.6
+            H, W, _ = self.img.shape
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    segment = polygon2rbox(segment, H, W)
+                    segment = np.array(segment)
+                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
+            if labels is not None:
+                # first get a box
+                if boxes is not None:
+                    #luyao#
+                    x0, y0, x1, y1 = boxes[i]
+                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
+                    horiz_align = "left"
+                elif masks is not None:
+                    # skip small mask without polygon
+                    if len(masks[i].polygons) == 0:
+                        continue
+
+                    x0, y0, x1, y1 = masks[i].bbox()
+
+                    # draw text in the center (defined by median) when box is not drawn
+                    # median is less sensitive to outliers.
+                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
+                    horiz_align = "center"
+                else:
+                    continue  # drawing the box confidence for keypoints isn't very useful.
+                # for small objects, draw text at the side to avoid occlusion
+
+                instance_area = (y1 - y0) * (x1 - x0)
+                # print(x0,' ',x1,' ',y0,' ',y1,' ',self.output.height,' ', self.output.width)
+                #luyao#
+                if y0<5:
+                    text_pos = ((x0+x1)//2,(y0+y1)//2)
+                #luyao#
+                # if (
+                #     instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
+                #     or y1 - y0 < 40 * self.output.scale
+                # ):
+                #     if y1 >= self.output.height - 5:
+                #         text_pos = (x1, y0)
+                #     else:
+                #         text_pos = (x0, y1)
+
+                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
+                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+                font_size = (
+                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
+                    * 1.0
+                    * self._default_font_size
+                )
+                self.draw_text(
+                #    labels[i],
+                    # '',
+                    rec[i],
+                    text_pos,
+                    color=lighter_color,
+                    horizontal_alignment=horiz_align,
+                    font_size=font_size,
+                )
+        # draw keypoints
+        if keypoints is not None:
+            for keypoints_per_instance in keypoints:
+                self.draw_and_connect_keypoints(keypoints_per_instance)
+
+        return self.output
+
+    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
+        """
+        Args:
+            boxes (ndarray): an Nx5 numpy array of
+                (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image.
+            labels (list[str]): the text to be displayed for each instance.
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = len(boxes)
+
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+
+        if num_instances == 0:
+            return self.output
+
+        # Display in largest to smallest order to reduce occlusion.
+        if boxes is not None:
+            areas = boxes[:, 2] * boxes[:, 3]
+
+        sorted_idxs = np.argsort(-areas).tolist()
+        # Re-order overlapped instances in descending order.
+        boxes = boxes[sorted_idxs]
+        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+        colors = [assigned_colors[idx] for idx in sorted_idxs]
+
+        for i in range(num_instances):
+            self.draw_rotated_box_with_label(
+                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
+            )
+
+        return self.output
+
+    def draw_and_connect_keypoints(self, keypoints):
+        """
+        Draws keypoints of an instance and follows the rules for keypoint connections
+        to draw lines between appropriate keypoints. This follows color heuristics for
+        line color.
+
+        Args:
+            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
+                and the last dimension corresponds to (x, y, probability).
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        visible = {}
+        keypoint_names = self.metadata.get("keypoint_names")
+        for idx, keypoint in enumerate(keypoints):
+            # draw keypoint
+            x, y, prob = keypoint
+            if prob > _KEYPOINT_THRESHOLD:
+                self.draw_circle((x, y), color=_RED)
+                if keypoint_names:
+                    keypoint_name = keypoint_names[idx]
+                    visible[keypoint_name] = (x, y)
+
+        if self.metadata.get("keypoint_connection_rules"):
+            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
+                if kp0 in visible and kp1 in visible:
+                    x0, y0 = visible[kp0]
+                    x1, y1 = visible[kp1]
+                    color = tuple(x / 255.0 for x in color)
+                    self.draw_line([x0, x1], [y0, y1], color=color)
+
+        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
+        # Note that this strategy is specific to person keypoints.
+        # For other keypoints, it should just do nothing
+        try:
+            ls_x, ls_y = visible["left_shoulder"]
+            rs_x, rs_y = visible["right_shoulder"]
+            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
+        except KeyError:
+            pass
+        else:
+            # draw line from nose to mid-shoulder
+            nose_x, nose_y = visible.get("nose", (None, None))
+            if nose_x is not None:
+                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
+
+            try:
+                # draw line from mid-shoulder to mid-hip
+                lh_x, lh_y = visible["left_hip"]
+                rh_x, rh_y = visible["right_hip"]
+            except KeyError:
+                pass
+            else:
+                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
+                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
+        return self.output
+
+    """
+    Primitive drawing functions:
+    """
+
+    def draw_text(
+        self,
+        text,
+        position,
+        *,
+        font_size=None,
+        color="g",
+        horizontal_alignment="center",
+        rotation=0
+    ):
+        """
+        Args:
+            text (str): class label
+            position (tuple): a tuple of the x and y coordinates to place text on image.
+            font_size (int, optional): font of the text. If not provided, a font size
+                proportional to the image width is calculated and used.
+            color: color of the text. Refer to `matplotlib.colors` for full list
+                of formats that are accepted.
+            horizontal_alignment (str): see `matplotlib.text.Text`
+            rotation: rotation angle in degrees CCW
+
+        Returns:
+            output (VisImage): image object with text drawn.
+        """
+        if not font_size:
+            font_size = self._default_font_size
+
+        # print(font_size, self.output.scale)
+
+        # since the text background is dark, we don't want the text to be dark
+        # color = np.maximum(list(mplc.to_rgb(color)), 0.2)
+        # color[np.argmax(color)] = max(0.8, np.max(color))
+        #luyao#
+        color = 'w'
+        # font_size = 7.0
+        x, y = position
+        font_path = "VNFREE.ttf"
+        prop = mfm.FontProperties(fname=font_path)
+        self.output.ax.text(
+            x,
+            y,
+            text,
+            size=font_size * self.output.scale,
+            # family="sans-serif",
+            family="monospace",
+            # family="serif",
+            #luyao#
+            bbox={"facecolor": "black", "alpha": 0.0, "pad": 0.0, "edgecolor": "none"},
+            # bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
+            # verticalalignment="top",
+            verticalalignment="bottom",
+            horizontalalignment=horizontal_alignment,
+            color=color,
+            zorder=10,
+            
+            rotation=rotation,
+            fontproperties=prop
+            #luyao
+            # fontweight='light'
+        )
+        return self.output
+
+    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
+        """
+        Args:
+            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
+                are the coordinates of the image's top left corner. x1 and y1 are the
+                coordinates of the image's bottom right corner.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x0, y0, x1, y1 = box_coord
+        width = x1 - x0
+        height = y1 - y0
+
+        # linewidth = max(self._default_font_size / 16, 1)
+        # linewidth = max(self._default_font_size / 4, 1)
+        #luyao#
+        edge_color=[0.196,0.80,0.196]
+        alpha = 1.0
+        linewidth = 0.7
+
+        self.output.ax.add_patch(
+            mpl.patches.Rectangle(
+                (x0, y0),
+                width,
+                height,
+                fill=False,
+                edgecolor=edge_color,
+                linewidth=linewidth * self.output.scale,
+                alpha=alpha,
+                linestyle=line_style,
+            )
+        )
+        return self.output
+
+    def draw_rotated_box_with_label(
+        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
+    ):
+        """
+        Draw a rotated box with label on its top-left corner.
+
+        Args:
+            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
+                where cnt_x and cnt_y are the center coordinates of the box.
+                w and h are the width and height of the box. angle represents how
+                many degrees the box is rotated CCW with regard to the 0-degree box.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+            label (string): label for rotated box. It will not be rendered when set to None.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        cnt_x, cnt_y, w, h, angle = rotated_box
+        area = w * h
+        # use thinner lines when the box is small
+        linewidth = self._default_font_size / (
+            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
+        )
+
+        theta = angle * math.pi / 180.0
+        c = math.cos(theta)
+        s = math.sin(theta)
+        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
+        # x: left->right ; y: top->down
+        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
+        for k in range(4):
+            j = (k + 1) % 4
+            self.draw_line(
+                [rotated_rect[k][0], rotated_rect[j][0]],
+                [rotated_rect[k][1], rotated_rect[j][1]],
+                color=edge_color,
+                linestyle="--" if k == 1 else line_style,
+                linewidth=linewidth,
+            )
+
+        if label is not None:
+            text_pos = rotated_rect[1]  # topleft corner
+
+            height_ratio = h / np.sqrt(self.output.height * self.output.width)
+            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
+            font_size = (
+                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
+            )
+            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
+
+        return self.output
+
+    def draw_circle(self, circle_coord, color, radius=3):
+        """
+        Args:
+            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
+                of the center of the circle.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            radius (int): radius of the circle.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x, y = circle_coord
+        self.output.ax.add_patch(
+            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
+        )
+        return self.output
+
+    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
+        """
+        Args:
+            x_data (list[int]): a list containing x values of all the points being drawn.
+                Length of list should match the length of y_data.
+            y_data (list[int]): a list containing y values of all the points being drawn.
+                Length of list should match the length of x_data.
+            color: color of the line. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
+                for a full list of formats that are accepted.
+            linewidth (float or None): width of the line. When it's None,
+                a default value will be computed and used.
+
+        Returns:
+            output (VisImage): image object with line drawn.
+        """
+        if linewidth is None:
+            linewidth = self._default_font_size / 3
+        linewidth = max(linewidth, 1)
+        self.output.ax.add_line(
+            mpl.lines.Line2D(
+                x_data,
+                y_data,
+                linewidth=linewidth * self.output.scale,
+                color=color,
+                linestyle=linestyle,
+            )
+        )
+        return self.output
+
+    def draw_binary_mask(
+        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=0
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn in the object's center of mass.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component small than this will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = False
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+
+        if not mask.has_holes:
+            # draw polygons for regular masks
+            for segment in mask.polygons:
+                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
+                if area < (area_threshold or 0):
+                    continue
+                has_valid_segment = True
+                segment = segment.reshape(-1, 2)
+                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
+        else:
+            # TODO: Use Path/PathPatch to draw vector graphics:
+            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+            rgba = np.zeros(shape2d + (4,), dtype="float32")
+            rgba[:, :, :3] = color
+            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+            has_valid_segment = True
+            self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if text is not None and has_valid_segment:
+            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+            largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+            # draw text on the largest component, as well as other very large components.
+            for cid in range(1, _num_cc):
+                if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+                    # median is more stable than centroid
+                    # center = centroids[largest_component_id]
+                    center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+                    self.draw_text(text, center, color=lighter_color)
+        return self.output
+
+    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
+        """
+        Args:
+            segment: numpy array of shape Nx2, containing all the points in the polygon.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted. If not provided, a darker shade
+                of the polygon color will be used instead.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with polygon drawn.
+        """
+        #luyao#
+        # edge_color = []
+        if edge_color is None:
+            # make edge color darker than the polygon color
+            if alpha > 0.8:
+                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
+            else:
+                edge_color = color
+        edge_color = mplc.to_rgb(edge_color) + (1,)
+
+        polygon = mpl.patches.Polygon(
+            segment,
+            fill=True,
+            facecolor=mplc.to_rgb(color) + (alpha,),
+            #luyao# qudiaomaskyanse
+            # edgecolor=edge_color,
+            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
+        )
+        self.output.ax.add_patch(polygon)
+        return self.output
+
+    """
+    Internal methods:
+    """
+
+    def _jitter(self, color):
+        """
+        Randomly modifies given color to produce a slightly different color than the color given.
+
+        Args:
+            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
+                picked. The values in the list are in the [0.0, 1.0] range.
+
+        Returns:
+            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
+                color after being jittered. The values in the list are in the [0.0, 1.0] range.
+        """
+        color = mplc.to_rgb(color)
+        vec = np.random.rand(3)
+        # better to do it in another color space
+        vec = vec / np.linalg.norm(vec) * 0.5
+        res = np.clip(vec + color, 0, 1)
+        return tuple(res)
+
+    def _create_grayscale_image(self, mask=None):
+        """
+        Create a grayscale version of the original image.
+        The colors in masked area, if given, will be kept.
+        """
+        img_bw = self.img.astype("f4").mean(axis=2)
+        img_bw = np.stack([img_bw] * 3, axis=2)
+        if mask is not None:
+            img_bw[mask] = self.img[mask]
+        return img_bw
+
+    def _change_color_brightness(self, color, brightness_factor):
+        """
+        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
+        less or more saturation than the original color.
+
+        Args:
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
+                0 will correspond to no change, a factor in [-1.0, 0) range will result in
+                a darker color and a factor in (0, 1.0] range will result in a lighter color.
+
+        Returns:
+            modified_color (tuple[double]): a tuple containing the RGB values of the
+                modified color. Each value in the tuple is in the [0.0, 1.0] range.
+        """
+        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
+        color = mplc.to_rgb(color)
+        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
+        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
+        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
+        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
+        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
+        return modified_color
+
+    def _convert_boxes(self, boxes):
+        """
+        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
+        """
+        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+            return boxes.tensor.numpy()
+        else:
+            return np.asarray(boxes)
+
+    def _convert_masks(self, masks_or_polygons):
+        """
+        Convert different format of masks or polygons to a tuple of masks and polygons.
+
+        Returns:
+            list[GenericMask]:
+        """
+
+        m = masks_or_polygons
+        if isinstance(m, PolygonMasks):
+            m = m.polygons
+        if isinstance(m, BitMasks):
+            m = m.tensor.numpy()
+        if isinstance(m, torch.Tensor):
+            m = m.numpy()
+        ret = []
+        for x in m:
+            if isinstance(x, GenericMask):
+                ret.append(x)
+            else:
+                ret.append(GenericMask(x, self.output.height, self.output.width))
+        return ret
+
+    def _convert_keypoints(self, keypoints):
+        if isinstance(keypoints, Keypoints):
+            keypoints = keypoints.tensor
+        keypoints = np.asarray(keypoints)
+        return keypoints
+
+    def get_output(self):
+        """
+        Returns:
+            output (VisImage): the image output containing the visualizations added
+            to the image.
+        """
+        return self.output
+        
+def polygon2rbox(polygon, image_height, image_width):
+    poly = np.array(polygon).reshape((-1, 2)).astype(np.float32)
+    rect = cv2.minAreaRect(poly)
+    corners = cv2.boxPoints(rect)
+    corners = np.array(corners, dtype="int")
+    pts = get_tight_rect(corners, 0, 0, image_height, image_width, 1)
+    pts = list(map(int, pts))
+    return pts
+
+def get_tight_rect(points, start_x, start_y, image_height, image_width, scale):
+    points = list(points)
+    ps = sorted(points, key=lambda x: x[0])
+
+    if ps[1][1] > ps[0][1]:
+        px1 = ps[0][0] * scale + start_x
+        py1 = ps[0][1] * scale + start_y
+        px4 = ps[1][0] * scale + start_x
+        py4 = ps[1][1] * scale + start_y
+    else:
+        px1 = ps[1][0] * scale + start_x
+        py1 = ps[1][1] * scale + start_y
+        px4 = ps[0][0] * scale + start_x
+        py4 = ps[0][1] * scale + start_y
+    if ps[3][1] > ps[2][1]:
+        px2 = ps[2][0] * scale + start_x
+        py2 = ps[2][1] * scale + start_y
+        px3 = ps[3][0] * scale + start_x
+        py3 = ps[3][1] * scale + start_y
+    else:
+        px2 = ps[3][0] * scale + start_x
+        py2 = ps[3][1] * scale + start_y
+        px3 = ps[2][0] * scale + start_x
+        py3 = ps[2][1] * scale + start_y
+
+    px1 = min(max(px1, 1), image_width - 1)
+    px2 = min(max(px2, 1), image_width - 1)
+    px3 = min(max(px3, 1), image_width - 1)
+    px4 = min(max(px4, 1), image_width - 1)
+    py1 = min(max(py1, 1), image_height - 1)
+    py2 = min(max(py2, 1), image_height - 1)
+    py3 = min(max(py3, 1), image_height - 1)
+    py4 = min(max(py4, 1), image_height - 1)
+    return [px1, py1, px2, py2, px3, py3, px4, py4]
+
+dictionary = "aàáạảãâầấậẩẫăằắặẳẵAÀÁẠẢÃĂẰẮẶẲẴÂẦẤẬẨẪeèéẹẻẽêềếệểễEÈÉẸẺẼÊỀẾỆỂỄoòóọỏõôồốộổỗơờớợởỡOÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠiìíịỉĩIÌÍỊỈĨuùúụủũưừứựửữƯỪỨỰỬỮUÙÚỤỦŨyỳýỵỷỹYỲÝỴỶỸ"
+
+
+def make_groups():
+    groups = []
+    i = 0
+    while i < len(dictionary) - 5:
+        group = [c for c in dictionary[i : i + 6]]
+        i += 6
+        groups.append(group)
+    return groups
+
+
+groups = make_groups()
+
+TONES = ["", "ˋ", "ˊ", "﹒", "ˀ", "˜"]
+SOURCES = ["ă", "â", "Ă", "Â", "ê", "Ê", "ô", "ơ", "Ô", "Ơ", "ư", "Ư", "Đ", "đ"]
+TARGETS = ["aˇ", "aˆ", "Aˇ", "Aˆ", "eˆ", "Eˆ", "oˆ", "o˒", "Oˆ", "O˒", "u˒", "U˒", "D-", "d‑"]
+
+
+def correct_tone_position(word):
+    word = word[:-1]
+    if len(word) < 2:
+        pass
+    first_ord_char = ""
+    second_order_char = ""
+    for char in word:
+        for group in groups:
+            if char in group:
+                second_order_char = first_ord_char
+                first_ord_char = group[0]
+    if word[-1] == first_ord_char and second_order_char != "":
+        pair_chars = ["qu", "Qu", "qU", "QU", "gi", "Gi", "gI", "GI"]
+        for pair in pair_chars:
+            if pair in word and second_order_char in ["u", "U", "i", "I"]:
+                return first_ord_char
+        return second_order_char
+    return first_ord_char
+
+
+def decoder(recognition):
+    for char in TARGETS:
+        recognition = recognition.replace(char, SOURCES[TARGETS.index(char)])
+    if len(recognition) < 1:
+        return recognition
+    if recognition[-1] in TONES:
+        if len(recognition) < 2:
+            return recognition
+        replace_char = correct_tone_position(recognition)
+        tone = recognition[-1]
+        recognition = recognition[:-1]
+        for group in groups:
+            if replace_char in group:
+                recognition = recognition.replace(replace_char, group[TONES.index(tone)])
+    return recognition
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/.ninja_deps b/src/sts/build/temp.linux-x86_64-cpython-38/.ninja_deps
new file mode 100644
index 0000000000000000000000000000000000000000..ac1d2a7b0171f695d3cdf173ffb42e3cba4cdfd1
Binary files /dev/null and b/src/sts/build/temp.linux-x86_64-cpython-38/.ninja_deps differ
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/.ninja_log b/src/sts/build/temp.linux-x86_64-cpython-38/.ninja_log
new file mode 100644
index 0000000000000000000000000000000000000000..986bbf3c9a9572f0242a8852e3c37b0f00118ea8
--- /dev/null
+++ b/src/sts/build/temp.linux-x86_64-cpython-38/.ninja_log
@@ -0,0 +1,99 @@
+# ninja log v5
+1	665	1697438407310132778	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o	ed6c45e0575695f4
+1	6062	1697438412702132765	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	eaedb63398f5945f
+2	7219	1697438413862132761	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	ff5f6565993041dd
+1	8171	1697438414814132759	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	718bc147426618e4
+1	9851	1697438416490132754	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	cac22bcaad476577
+1	12252	1697438418898132746	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o	18d4d3995014baf1
+1	12265	1697438418910132746	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o	61d71d2decaffc65
+2	12733	1697438419378132745	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o	7a530b651bdbdb66
+2	13266	1697438419910132742	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o	cf7727a2b5d8c09c
+1	14949	1697438421594132737	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o	1573fe35a3fee66
+2	20848	1697438427482132713	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	5e36f32603dbe3ea
+2	556	1697438709406127420	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o	fbe7258ec5f19b59
+1	5764	1697438714606127249	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	be881126815a16f0
+1	6520	1697438715366127223	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	4390de017a5990be
+2	9393	1697438718238127127	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	5f7de5d0ed3b88a
+1	9411	1697438718254127127	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	fc648de0b09e4645
+1	11167	1697438720010127067	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o	2392e0abc8d5cb5
+2	12557	1697438721402127020	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o	cd9cd25b6c4eb467
+1	12610	1697438721454127018	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o	e955bbb72ea83b83
+2	13630	1697438722474126984	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o	9b57a42035331b34
+2	14596	1697438723442126950	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o	d6f0a857f027aeec
+2	20789	1697438729626126737	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	f1b440d013788d2d
+2	443	1697438804234123852	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o	ed6c45e0575695f4
+2	6526	1697438810314123595	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	eaedb63398f5945f
+2	6869	1697438810658123580	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	718bc147426618e4
+1	8729	1697438812518123501	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	cac22bcaad476577
+2	9284	1697438813074123477	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	ff5f6565993041dd
+2	12029	1697438815818123359	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o	61d71d2decaffc65
+1	12238	1697438816026123350	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o	18d4d3995014baf1
+2	12512	1697438816302123338	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o	7a530b651bdbdb66
+2	14868	1697438818654123236	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o	cf7727a2b5d8c09c
+2	15474	1697438819262123210	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o	1573fe35a3fee66
+2	19984	1697438823762123013	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	5e36f32603dbe3ea
+1	447	1697438824902122963	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o	fbe7258ec5f19b59
+1	5219	1697438829670122753	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	be881126815a16f0
+1	5686	1697438830138122732	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	4390de017a5990be
+1	6722	1697438831174122686	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	5f7de5d0ed3b88a
+0	7982	1697438832434122630	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	fc648de0b09e4645
+0	10232	1697438834682122529	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o	e955bbb72ea83b83
+1	11694	1697438836146122463	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o	cd9cd25b6c4eb467
+1	12150	1697438836602122442	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o	2392e0abc8d5cb5
+1	13342	1697438837794122388	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o	9b57a42035331b34
+1	13785	1697438838238122369	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o	d6f0a857f027aeec
+1	19968	1697438844410122088	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	f1b440d013788d2d
+2	507	1697438876002120587	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o	ed6c45e0575695f4
+2	6486	1697438881978120295	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	eaedb63398f5945f
+2	6833	1697438882326120277	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	718bc147426618e4
+3	7529	1697438883022120243	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	ff5f6565993041dd
+1	10110	1697438885602120115	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	cac22bcaad476577
+2	11904	1697438887398120025	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o	61d71d2decaffc65
+3	12604	1697438888098119990	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o	7a530b651bdbdb66
+1	12981	1697438888474119972	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o	18d4d3995014baf1
+3	14821	1697438890314119879	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o	1573fe35a3fee66
+3	15223	1697438890718119860	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o	cf7727a2b5d8c09c
+3	21189	1697438896674119559	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	5e36f32603dbe3ea
+2	544	1697438897910119496	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o	fbe7258ec5f19b59
+2	6493	1697438903854119193	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	be881126815a16f0
+2	7314	1697438904678119150	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	4390de017a5990be
+3	8672	1697438906038119080	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	5f7de5d0ed3b88a
+1	8689	1697438906050119079	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	fc648de0b09e4645
+2	11583	1697438908946118930	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o	2392e0abc8d5cb5
+1	11790	1697438909154118919	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o	e955bbb72ea83b83
+2	12164	1697438909526118900	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o	cd9cd25b6c4eb467
+3	12674	1697438910034118873	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o	9b57a42035331b34
+2	14473	1697438911838118779	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o	d6f0a857f027aeec
+3	19266	1697438916618118529	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	f1b440d013788d2d
+1	4692	1697463539651223601	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	2af7cda4c5b76e35
+1	5297	1697463540259223196	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	2ca0db8e9fda5194
+1	5561	1697463540523223020	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	17454622576b22b
+1	6536	1697463541495222374	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	9248c7d6c0a647be
+1	16419	1697463551371215803	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	104d6b043140ac1a
+1	4464	1697463556227212575	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	d6e6c5d411222361
+1	4964	1697463556731212240	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	36b83fdc1499f45f
+2	5329	1697463557099211996	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	33a126430b9cf02a
+1	6335	1697463558103211329	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	d5adf15ec37abd39
+2	16211	1697463567967204779	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	6ced340a63903477
+2	503	1697465147878323864	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o	ed6c45e0575695f4
+2	7457	1697465154822321241	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	eaedb63398f5945f
+2	7773	1697465155142321120	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	718bc147426618e4
+3	8300	1697465155670320921	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	ff5f6565993041dd
+1	9413	1697465156782320500	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	cac22bcaad476577
+2	12700	1697465160070319258	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o	18d4d3995014baf1
+2	12881	1697465160250319190	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o	61d71d2decaffc65
+2	13134	1697465160506319094	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o	7a530b651bdbdb66
+3	13899	1697465161270318805	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o	cf7727a2b5d8c09c
+2	15004	1697465162374318388	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o	1573fe35a3fee66
+3	20866	1697465168226316177	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	5e36f32603dbe3ea
+4	545	1697465169486315701	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o	fbe7258ec5f19b59
+4	5458	1697465174394313848	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o	be881126815a16f0
+4	6492	1697465175430313456	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o	5f7de5d0ed3b88a
+2	7748	1697465176682312984	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o	fc648de0b09e4645
+3	7856	1697465176794312941	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o	4390de017a5990be
+4	11163	1697465180102311692	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o	cd9cd25b6c4eb467
+3	11319	1697465180254311635	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o	2392e0abc8d5cb5
+3	11936	1697465180874311400	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o	e955bbb72ea83b83
+4	13348	1697465182286310867	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o	9b57a42035331b34
+4	15194	1697465184134310170	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o	d6f0a857f027aeec
+4	18855	1697465187782308792	/home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o	f1b440d013788d2d
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/build.ninja b/src/sts/build/temp.linux-x86_64-cpython-38/build.ninja
new file mode 100644
index 0000000000000000000000000000000000000000..d39412638310346e43aa002498033e4bc21151d2
--- /dev/null
+++ b/src/sts/build/temp.linux-x86_64-cpython-38/build.ninja
@@ -0,0 +1,38 @@
+ninja_required_version = 1.3
+cxx = c++
+nvcc = /usr/local/cuda/bin/nvcc
+
+cflags = -pthread -B /home/kienvs/anaconda3/envs/track/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc -I/home/kienvs/anaconda3/envs/track/lib/python3.8/site-packages/torch/include -I/home/kienvs/anaconda3/envs/track/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/home/kienvs/anaconda3/envs/track/lib/python3.8/site-packages/torch/include/TH -I/home/kienvs/anaconda3/envs/track/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/kienvs/anaconda3/envs/track/include/python3.8 -c
+post_cflags = -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_C -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14 -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_C -D_GLIBCXX_USE_CXX11_ABI=0
+cuda_cflags = -DWITH_CUDA -I/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc -I/home/kienvs/anaconda3/envs/track/lib/python3.8/site-packages/torch/include -I/home/kienvs/anaconda3/envs/track/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/home/kienvs/anaconda3/envs/track/lib/python3.8/site-packages/torch/include/TH -I/home/kienvs/anaconda3/envs/track/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/kienvs/anaconda3/envs/track/include/python3.8 -c
+cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -O3 -DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_C -D_GLIBCXX_USE_CXX11_ABI=0 -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_C -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 -std=c++14
+ldflags = 
+
+rule compile
+  command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
+  depfile = $out.d
+  deps = gcc
+
+rule cuda_compile
+  depfile = $out.d
+  deps = gcc
+  command = $nvcc  $cuda_cflags -c $in -o $out $cuda_post_cflags
+
+
+
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o: compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o: cuda_compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o: compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o: cuda_compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o: compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.cpp
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o: cuda_compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.cu
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o: cuda_compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.cu
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o: cuda_compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o: compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o: cuda_compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
+build /home/kienvs/sources/poi_engineering_api/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o: compile /home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.cpp
+
+
+
+
+
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o
new file mode 100644
index 0000000000000000000000000000000000000000..b70c4d472a4bb6f41bd449f67bbac1d90c154c30
--- /dev/null
+++ b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:262c7d77f8245158adf56743933e594a9bbb04a3ffc2d449ac3dd4df3d47ea86
+size 7424304
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o
new file mode 100644
index 0000000000000000000000000000000000000000..8ceb10359c2c072bbc7bb86c3847e6aae0b434a5
Binary files /dev/null and b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.o differ
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o
new file mode 100644
index 0000000000000000000000000000000000000000..b4c5ba532fbb0c26d7dd09ea906fa3b0e7d50751
--- /dev/null
+++ b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80ef9365b14479c0fc957be6801e9f33e1035b636c1035f167e73d2418471b87
+size 4645736
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o
new file mode 100644
index 0000000000000000000000000000000000000000..5f23d4225a56f901d25fcbcf04a4e33e3778f694
Binary files /dev/null and b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.o differ
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o
new file mode 100644
index 0000000000000000000000000000000000000000..aba2f042b5566b692c8aa3a59785a769b2716c42
--- /dev/null
+++ b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef315008e37cbf97203670e6b686602cf38de5122be696f9ed126fe8ea07a4d2
+size 7364248
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o
new file mode 100644
index 0000000000000000000000000000000000000000..b25d1528690f0f93b1baa39bd1827e21fcff04e6
Binary files /dev/null and b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.o differ
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o
new file mode 100644
index 0000000000000000000000000000000000000000..e6d3820865b57fc07a6d0c62d2986ecfe86a666c
Binary files /dev/null and b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.o differ
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o
new file mode 100644
index 0000000000000000000000000000000000000000..5af0e2e2cbb1662f911baa92a905eaa39bdb6aea
Binary files /dev/null and b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.o differ
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o
new file mode 100644
index 0000000000000000000000000000000000000000..56bee24a32ccbbc983c8613dd90b86c52e2d52af
--- /dev/null
+++ b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c40402efd1713850410f95784d8dbe2b31697e941916eb82a077c6c2d6ff4115
+size 5183664
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o
new file mode 100644
index 0000000000000000000000000000000000000000..1d6dc6b3ad8a53c46ee960680d5883d577b6cb0a
Binary files /dev/null and b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.o differ
diff --git a/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o
new file mode 100644
index 0000000000000000000000000000000000000000..9a210adea098a87ee9ac495474452dce170e0d64
--- /dev/null
+++ b/src/sts/build/temp.linux-x86_64-cpython-38/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.o
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:086f867072a20b6f69340938537b4df361663be4a603d0c3c6a95f6c194a7130
+size 22413352
diff --git a/src/sts/configs/Base-RCNN-C4.yaml b/src/sts/configs/Base-RCNN-C4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fbf34a0ea57a587e09997edd94c4012d69d0b6ad
--- /dev/null
+++ b/src/sts/configs/Base-RCNN-C4.yaml
@@ -0,0 +1,18 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RPN:
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "Res5ROIHeads"
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/src/sts/configs/Base-RCNN-DilatedC5.yaml b/src/sts/configs/Base-RCNN-DilatedC5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0d6d16bdaf532f09e4976f0aa240a49e748da27
--- /dev/null
+++ b/src/sts/configs/Base-RCNN-DilatedC5.yaml
@@ -0,0 +1,31 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RESNETS:
+    OUT_FEATURES: ["res5"]
+    RES5_DILATION: 2
+  RPN:
+    IN_FEATURES: ["res5"]
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["res5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/src/sts/configs/Base-RCNN-FPN.yaml b/src/sts/configs/Base-RCNN-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e020f2e7b2f26765be317f907126a1556621abf
--- /dev/null
+++ b/src/sts/configs/Base-RCNN-FPN.yaml
@@ -0,0 +1,42 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/src/sts/configs/Base-RetinaNet.yaml b/src/sts/configs/Base-RetinaNet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b45b982bbf84b34d2a6a172ab0a946b1029f7c8
--- /dev/null
+++ b/src/sts/configs/Base-RetinaNet.yaml
@@ -0,0 +1,25 @@
+MODEL:
+  META_ARCHITECTURE: "RetinaNet"
+  BACKBONE:
+    NAME: "build_retinanet_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
+  FPN:
+    IN_FEATURES: ["res3", "res4", "res5"]
+  RETINANET:
+    IOU_THRESHOLDS: [0.4, 0.5]
+    IOU_LABELS: [0, -1, 1]
+    SMOOTH_L1_LOSS_BETA: 0.0
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/src/sts/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/src/sts/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..773ac10e87c626760d00d831bf664ce9ff073c49
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,17 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  LOAD_PROPOSALS: True
+  RESNETS:
+    DEPTH: 50
+  PROPOSAL_GENERATOR:
+    NAME: "PrecomputedProposals"
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
+  TEST: ("coco_2017_val",)
+  PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+DATALOADER:
+  # proposals are part of the dataset_dicts, and take a lot of RAM
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db142cd671c1841b4f64cf130bee7f7954ecdd28
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bceb6b343618d8cd9a6c414ff9eb86ab31cc230a
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57a098f53ee8c54ecfa354cc96efefd890dc1b72
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f96130105c3ba6ab393e0932870903875f5cb732
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc51bce390a85ee3529ffdcebde05748e1646be0
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0fe96f57febdac5790ea4cec168fa4b97ac4807a
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33fadeb87d1ef67ab2b55926b9a652ab4ac4a27d
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3262019a1211b910d3b371569199ed1afaacf6a4
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41395182bf5c9dd8ab1241c4414068817298d554
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/src/sts/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c9b5ab77157baa581d90d9847c045c19ed6ffa3
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: False
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/src/sts/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4abb1b9a547957aa6afc0b29129e00f89cf98d59
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/src/sts/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a24ce3a9a108a8792e18c8aabfb7b712f0d3725
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/src/sts/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b5412d4a7aef1d6c3f7c1e34f94007de639b833
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/src/sts/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e04821156b0376ba5215d5ce5b7010a36b43e6a1
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  META_ARCHITECTURE: "ProposalNetwork"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  RPN:
+    PRE_NMS_TOPK_TEST: 12000
+    POST_NMS_TOPK_TEST: 2000
diff --git a/src/sts/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/src/sts/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dc9c95203b1c3c9cd9bb9876bb8d9a5dd9b31d9a
--- /dev/null
+++ b/src/sts/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "ProposalNetwork"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  RPN:
+    POST_NMS_TOPK_TEST: 2000
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a94cc45a0f2aaa8c92e14871c553b736545e327
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67b70cf4be8c19f5dc735b6f55a8690698f34b69
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1935a302d2d0fa7f69553b3fd50b5a7082c6c0d1
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9aeb4eac38026dbb867e799f9fd3a8d8eb3af80
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..38ed867d897dfec839cbcf11a2e2dc8abb92f07c
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b13eefab2a049c48d94d5051c82ceb6dbde40579
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d401016358f967f6619d88b1c9bd5673a1cdeba8
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d50fb866ca7811a87b42555c7213f88e00bf6df1
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bec680ee17a474fefe527b7b79d26266e75c09f0
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  RPN:
+    BBOX_REG_LOSS_TYPE: "giou"
+    BBOX_REG_LOSS_WEIGHT: 2.0
+  ROI_BOX_HEAD:
+    BBOX_REG_LOSS_TYPE: "giou"
+    BBOX_REG_LOSS_WEIGHT: 10.0
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be7d06b8e0f032ee7fcaabd7c122158518489fd2
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d14c63f74383bfc308750f51d51344398b02a239
--- /dev/null
+++ b/src/sts/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/src/sts/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e03944a42d2e497da5ceca17c8fda797dac3f82
--- /dev/null
+++ b/src/sts/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  KEYPOINT_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
+  RPN:
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)
diff --git a/src/sts/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/src/sts/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9309535c57a1aa7d23297aac80a9bd78a6c79fcc
--- /dev/null
+++ b/src/sts/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/src/sts/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7bf85cf745b53b3e7ab28fe94b7f4f9e7fe6e335
--- /dev/null
+++ b/src/sts/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/src/sts/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a07f243f650a497b9372501e3face75194cf0941
--- /dev/null
+++ b/src/sts/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/src/sts/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4bfa20a98c0a65c6bd60e93b07e8f4b7d92a867
--- /dev/null
+++ b/src/sts/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,12 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/src/sts/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f00d54b760c2b9271c75643e0a1ab1ffc0d9543a
--- /dev/null
+++ b/src/sts/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  MASK_ON: True
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic_separated",)
+  TEST: ("coco_2017_val_panoptic_separated",)
+DATALOADER:
+  FILTER_EMPTY_ANNOTATIONS: False
diff --git a/src/sts/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/src/sts/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e01f6fb31e9b00b1857b7de3b5074184d1f4a21
--- /dev/null
+++ b/src/sts/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/src/sts/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6afa2c1cc92495309ed1553a17359fe5d7d6566e
--- /dev/null
+++ b/src/sts/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
diff --git a/src/sts/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/src/sts/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b956b3f673e78649184fe2c50e2700b3f1f14794
--- /dev/null
+++ b/src/sts/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/src/sts/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a7aaeb961581ed9492c4cfe5a69a1eb60495b3e
--- /dev/null
+++ b/src/sts/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  # For better, more stable performance initialize from COCO
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+  MASK_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 1024
+  MAX_SIZE_TRAIN: 2048
+  MAX_SIZE_TEST: 2048
+DATASETS:
+  TRAIN: ("cityscapes_fine_instance_seg_train",)
+  TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+  BASE_LR: 0.01
+  STEPS: (18000,)
+  MAX_ITER: 24000
+  IMS_PER_BATCH: 8
+TEST:
+  EVAL_PERIOD: 8000
diff --git a/src/sts/configs/Detectron1-Comparisons/README.md b/src/sts/configs/Detectron1-Comparisons/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a90ed9e433a00b8b9f43961d7a2696d5b9013127
--- /dev/null
+++ b/src/sts/configs/Detectron1-Comparisons/README.md
@@ -0,0 +1,83 @@
+
+Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
+
+The differences in implementation details are shared in
+[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
+
+The differences in model zoo's experimental settings include:
+* Use scale augmentation during training. This improves AP with lower training cost.
+* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
+  affect other AP.
+* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
+* Use `ROIAlignV2`. This does not significantly affect AP.
+
+In this directory, we provide a few configs that __do not__ have the above changes.
+They mimic Detectron's behavior as close as possible,
+and provide a fair comparison of accuracy and speed against Detectron.
+
+<!--
+./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">kp.<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.219</td>
+<td align="center">0.038</td>
+<td align="center">3.1</td>
+<td align="center">36.9</td>
+<td align="center"></td>
+<td align="center"></td>
+<td align="center">137781054</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.313</td>
+<td align="center">0.071</td>
+<td align="center">5.0</td>
+<td align="center">53.1</td>
+<td align="center"></td>
+<td align="center">64.2</td>
+<td align="center">137781195</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.273</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">37.8</td>
+<td align="center">34.9</td>
+<td align="center"></td>
+<td align="center">137781281</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+## Comparisons:
+
+* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
+* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
+  [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
+	compensated back by some parameter tuning.
+* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
+
+For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
diff --git a/src/sts/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/src/sts/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6ce77f137fa2c4e5254a62b58c18b8b76096f2aa
--- /dev/null
+++ b/src/sts/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,17 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
diff --git a/src/sts/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/src/sts/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aacf868ba5290c752031c130a2081af48afc0808
--- /dev/null
+++ b/src/sts/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)
diff --git a/src/sts/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/src/sts/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ea86a8d8e2cd3e51cbc7311b0d00710c07d01f6
--- /dev/null
+++ b/src/sts/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_MASK_HEAD:
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
diff --git a/src/sts/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/src/sts/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0c3a1bbc0a09e1384de522f30c443ba1e36fafa
--- /dev/null
+++ b/src/sts/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/src/sts/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..64b4caa4ef2b284782367ea702e1ae6653472630
--- /dev/null
+++ b/src/sts/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/src/sts/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8b822c6c006ba642f4caf9b55e7983f6797427a
--- /dev/null
+++ b/src/sts/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,23 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/src/sts/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca4dd97144561276ecaabbb6c254e3a7737ac157
--- /dev/null
+++ b/src/sts/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/src/sts/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f313295ee5f0d553d394ce2efe003810c79af47d
--- /dev/null
+++ b/src/sts/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/src/sts/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6528f7c31c8cfbf139c14fd0cae598592d8e898
--- /dev/null
+++ b/src/sts/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,26 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/src/sts/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/src/sts/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abb33b618932e94b66239945ac892f4c84a6e8f8
--- /dev/null
+++ b/src/sts/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
diff --git a/src/sts/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/src/sts/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2201ad5c46ded91ccfa47b7698a521625c5e447
--- /dev/null
+++ b/src/sts/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/src/sts/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc117f6b5e3e51558ec2f01b73c5365622e5ce25
--- /dev/null
+++ b/src/sts/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
@@ -0,0 +1,36 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 152
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+  ROI_HEADS:
+    NAME: "CascadeROIHeads"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NUM_CONV: 8
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  IMS_PER_BATCH: 128
+  STEPS: (35000, 45000)
+  MAX_ITER: 50000
+  BASE_LR: 0.16
+INPUT:
+  MIN_SIZE_TRAIN: (640, 864)
+  MIN_SIZE_TRAIN_SAMPLING: "range"
+  MAX_SIZE_TRAIN: 1440
+  CROP:
+    ENABLED: True
+TEST:
+  EVAL_PERIOD: 2500
diff --git a/src/sts/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c3b767ff473bbab7225cc8a4a92608543d78246
--- /dev/null
+++ b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    CLS_AGNOSTIC_MASK: True
diff --git a/src/sts/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04ff988d073ef9169ee4ca2cbce0d6f030c15232
--- /dev/null
+++ b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
diff --git a/src/sts/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..68c0ca58d7df97ca728c339da0ca9828fe6be318
--- /dev/null
+++ b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74d274e5a529b5a8afe186940868f9d48c6112b3
--- /dev/null
+++ b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "GN"
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+  ROI_MASK_HEAD:
+    NORM: "GN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/src/sts/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..11ebb076ba529f26c71a0d972e96ca4c2d6a830b
--- /dev/null
+++ b/src/sts/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
@@ -0,0 +1,24 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+    STRIDE_IN_1X1: True
+  FPN:
+    NORM: "SyncBN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "SyncBN"
+  ROI_MASK_HEAD:
+    NORM: "SyncBN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+TEST:
+  PRECISE_BN:
+    ENABLED: True
diff --git a/src/sts/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/src/sts/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34016cea3ca9d7fb69ef4fe01d6b47ee8690a13b
--- /dev/null
+++ b/src/sts/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
@@ -0,0 +1,26 @@
+# A large PanopticFPN for demo purposes.
+# Use GN on backbone to support semantic seg.
+# Use Cascade + Deform Conv to improve localization.
+_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
+  RESNETS:
+    DEPTH: 101
+    NORM: "GN"
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (105000, 125000)
+  MAX_ITER: 135000
+  IMS_PER_BATCH: 32
+  BASE_LR: 0.04
diff --git a/src/sts/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/src/sts/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3400288cde242fcf66eef7f63b5a9165ca663c5
--- /dev/null
+++ b/src/sts/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,13 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  # Train from random initialization.
+  WEIGHTS: ""
+  # It makes sense to divide by STD when training from scratch
+  # But it seems to make no difference on the results and C2's models didn't do this.
+  # So we keep things consistent with C2.
+  # PIXEL_STD: [57.375, 57.12, 58.395]
+  MASK_ON: True
+  BACKBONE:
+    FREEZE_AT: 0
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/src/sts/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/src/sts/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d90c9ff0ef4573252ee165b4c958ec5f74178176
--- /dev/null
+++ b/src/sts/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/src/sts/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/src/sts/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60d4e42330e396a1901437df8e17b262d5ad547a
--- /dev/null
+++ b/src/sts/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/src/sts/configs/Misc/semantic_R_50_FPN_1x.yaml b/src/sts/configs/Misc/semantic_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac256e1372770ab3d9ae522c962de0fd0dbceeb5
--- /dev/null
+++ b/src/sts/configs/Misc/semantic_R_50_FPN_1x.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_panoptic_stuffonly",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
diff --git a/src/sts/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/src/sts/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea2a6baaebd1a186db18f2904430ffb25901898e
--- /dev/null
+++ b/src/sts/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 20
+INPUT:
+  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+  MIN_SIZE_TEST: 800
+DATASETS:
+  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+  TEST: ('voc_2007_test',)
+SOLVER:
+  STEPS: (12000, 16000)
+  MAX_ITER: 18000  # 17.4 epochs
+  WARMUP_ITERS: 100
diff --git a/src/sts/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/src/sts/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e554cab18a358a27b630c1ab0c2359666b0e1514
--- /dev/null
+++ b/src/sts/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 20
+INPUT:
+  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+  MIN_SIZE_TEST: 800
+DATASETS:
+  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+  TEST: ('voc_2007_test',)
+SOLVER:
+  STEPS: (12000, 16000)
+  MAX_ITER: 18000  # 17.4 epochs
+  WARMUP_ITERS: 100
diff --git a/src/sts/configs/quick_schedules/README.md b/src/sts/configs/quick_schedules/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4e6c82ef3f75a73c7006f33d7c850a0d4781a58f
--- /dev/null
+++ b/src/sts/configs/quick_schedules/README.md
@@ -0,0 +1,8 @@
+These are quick configs for performance or accuracy regression tracking purposes.
+
+* `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can
+  successfully finish. They are not expected to produce reasonable training results.
+* `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify
+  the results are as expected.
+* `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy
+  is within the normal range.
diff --git a/src/sts/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/src/sts/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc5a4116cb096278823049c1f823e99f8e16e97e
--- /dev/null
+++ b/src/sts/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP",  43.87, 0.02]]
diff --git a/src/sts/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/src/sts/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e41a0fe7ffe9c3531741df49e546aa45cfe4fdee
--- /dev/null
+++ b/src/sts/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/src/sts/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2f37e5e2cc2a9e195e13703e9930e67e0f9a896
--- /dev/null
+++ b/src/sts/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
diff --git a/src/sts/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/src/sts/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52fc0ec03c8b87ab2be1dda97bec1e8c93e6bb5c
--- /dev/null
+++ b/src/sts/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+  TEST: ("coco_2017_val_100",)
+  PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14cf2aa82aec52ad44e28ead0665dad811d55457
--- /dev/null
+++ b/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
+DATASETS:
+  TEST: ("keypoints_coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
diff --git a/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3dd209f693bd0bfdd46a2c9e7e750dede3abc141
--- /dev/null
+++ b/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,16 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 1
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_val_100",)
+  TEST: ("keypoints_coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b92392f1c4457033ae4c87a521e339fe9e184ce
--- /dev/null
+++ b/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
@@ -0,0 +1,30 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+    NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
+    LOSS_WEIGHT: 4.0
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
+  RPN:
+    SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_val",)
+  TEST: ("keypoints_coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  WARMUP_FACTOR: 0.33333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
diff --git a/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bd962878fea64035887c48981beeb8d41bfdbd0
--- /dev/null
+++ b/src/sts/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,28 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
+  RPN:
+    SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_val",)
+  TEST: ("keypoints_coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  WARMUP_FACTOR: 0.33333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
diff --git a/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab6e69812b94ea7e071f29d9a6937d5c70805b5b
--- /dev/null
+++ b/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.001
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "value"
+    CLIP_VALUE: 1.0
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2d5b7ff87e069f8c774a230bdfd47b8c12d18a3
--- /dev/null
+++ b/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
diff --git a/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c4f1214efa520944fd941daec082ad45c164a23
--- /dev/null
+++ b/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.001
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f68dd8f96c7896b5fc95d694a399f2ce417c1deb
--- /dev/null
+++ b/src/sts/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val",)
+  TEST: ("coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (600,)
+  MAX_SIZE_TRAIN: 1000
+  MIN_SIZE_TEST: 800
+  MAX_SIZE_TEST: 1000
+SOLVER:
+  IMS_PER_BATCH: 8  # base uses 16
+  WARMUP_FACTOR: 0.33333
+  WARMUP_ITERS: 100
+  STEPS: (11000, 11600)
+  MAX_ITER: 12000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
diff --git a/src/sts/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/src/sts/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3ce6cf922ae07fba5b5e01edbac19bf58a8e9dd
--- /dev/null
+++ b/src/sts/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
diff --git a/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e5454bfd95cc37749c50aec7866f32d9a80ca2b7
--- /dev/null
+++ b/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP",  42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
+  AUG:
+    ENABLED: True
+    MIN_SIZES: (700, 800)  # to save some time
diff --git a/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6dbfcde0bf837990634d419a6dda1e2909c3cd7f
--- /dev/null
+++ b/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52f78762bda23331c97afd523cf98a5c118b113e
--- /dev/null
+++ b/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
@@ -0,0 +1,6 @@
+_BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml"
+MODEL:
+  ROI_BOX_HEAD:
+    TRAIN_ON_PRED_BOXES: True
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aadae4ce898761e1e40e5af65a9e5ea01053b936
--- /dev/null
+++ b/src/sts/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val",)
+  TEST: ("coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (600,)
+  MAX_SIZE_TRAIN: 1000
+  MIN_SIZE_TEST: 800
+  MAX_SIZE_TEST: 1000
+SOLVER:
+  WARMUP_FACTOR: 0.3333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/src/sts/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/src/sts/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..70874e3a92c9034d75cbbebb145b61084ba15e42
--- /dev/null
+++ b/src/sts/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100_panoptic_separated",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
diff --git a/src/sts/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/src/sts/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7cdee7bfcf6dc75dda52602a0d9177ad0a9cc6ed
--- /dev/null
+++ b/src/sts/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_val_100_panoptic_separated",)
+  TEST: ("coco_2017_val_100_panoptic_separated",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 1
diff --git a/src/sts/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/src/sts/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3bbf30196cb35434340d4c343cab0c96283cd4f
--- /dev/null
+++ b/src/sts/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_val_panoptic_separated",)
+  TEST: ("coco_2017_val_panoptic_separated",)
+SOLVER:
+  BASE_LR: 0.01
+  WARMUP_FACTOR: 0.001
+  WARMUP_ITERS: 500
+  STEPS: (5500,)
+  MAX_ITER: 7000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]]
diff --git a/src/sts/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/src/sts/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cb666c1a6b3e351227046bc9c2af8799408858e8
--- /dev/null
+++ b/src/sts/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]]
diff --git a/src/sts/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/src/sts/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d95c1f614296716374686b22055a587ccd052b9
--- /dev/null
+++ b/src/sts/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/src/sts/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7c3f908a9e80e98b2d25b6d384a60acaba9d4f8
--- /dev/null
+++ b/src/sts/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
diff --git a/src/sts/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/src/sts/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..402d432477507dc36f04c4a9777cb80fe06b2809
--- /dev/null
+++ b/src/sts/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  STEPS: (30,)
+  MAX_ITER: 40
+  BASE_LR: 0.005
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/src/sts/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bca74987d5218736983617883e0fe37f79d219b7
--- /dev/null
+++ b/src/sts/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+TEST:
+  EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
diff --git a/src/sts/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/src/sts/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14ab606f219b462fe37fcc7d5fbdbe65cb5c2642
--- /dev/null
+++ b/src/sts/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/src/sts/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/src/sts/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f78d775889b11e9e76743de5ddb8139198edf61
--- /dev/null
+++ b/src/sts/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_val_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_panoptic_stuffonly",)
+SOLVER:
+  BASE_LR: 0.01
+  WARMUP_FACTOR: 0.001
+  WARMUP_ITERS: 300
+  STEPS: (5500,)
+  MAX_ITER: 7000
+TEST:
+  EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
diff --git a/src/sts/demo/README.md b/src/sts/demo/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..133d8d38e5e9f5f44aca92c59f73309e166d7132
--- /dev/null
+++ b/src/sts/demo/README.md
@@ -0,0 +1,8 @@
+
+## Detectron2 Demo
+
+We provide a command line tool to run a simple demo of builtin configs.
+The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md).
+
+See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-)
+for a high-quality demo generated with this tool.
diff --git a/src/sts/demo/__pycache__/genericmask.cpython-38.pyc b/src/sts/demo/__pycache__/genericmask.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f200c220bc6e81cbabf02fa8df27d50c9176f16d
Binary files /dev/null and b/src/sts/demo/__pycache__/genericmask.cpython-38.pyc differ
diff --git a/src/sts/demo/__pycache__/predictor.cpython-38.pyc b/src/sts/demo/__pycache__/predictor.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e0c817f73244e13184fc4ac3444ddcb8ae711fe
Binary files /dev/null and b/src/sts/demo/__pycache__/predictor.cpython-38.pyc differ
diff --git a/src/sts/demo/__pycache__/sts.cpython-38.pyc b/src/sts/demo/__pycache__/sts.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4c7fa1baa0ac02e296cfd8f6476a1058de6bd30
Binary files /dev/null and b/src/sts/demo/__pycache__/sts.cpython-38.pyc differ
diff --git a/src/sts/demo/demo.py b/src/sts/demo/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb3d0bafdd8665f09d63f6634e2160e0a90d978d
--- /dev/null
+++ b/src/sts/demo/demo.py
@@ -0,0 +1,169 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import argparse
+import glob
+import multiprocessing as mp
+import os
+import time
+import cv2
+import tqdm
+
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.utils.logger import setup_logger
+
+from predictor import VisualizationDemo
+
+
+# constants
+WINDOW_NAME = "COCO detections"
+
+
+def setup_cfg(args):
+    # load config from file and command-line arguments
+    cfg = get_cfg()
+    # To use demo for Panoptic-DeepLab, please uncomment the following two lines.
+    # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config  # noqa
+    # add_panoptic_deeplab_config(cfg)
+
+    # -----
+    from projects.SWINTS.swints import add_SWINTS_config
+    add_SWINTS_config(cfg)
+    # -----
+
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    # Set score_threshold for builtin models
+    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
+    cfg.freeze()
+    return cfg
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
+    parser.add_argument(
+        "--config-file",
+        default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
+        metavar="FILE",
+        help="path to config file",
+    )
+    parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
+    parser.add_argument("--video-input", help="Path to video file.")
+    parser.add_argument(
+        "--input",
+        nargs="+",
+        help="A list of space separated input images; "
+        "or a single glob pattern such as 'directory/*.jpg'",
+    )
+    parser.add_argument(
+        "--output",
+        help="A file or directory to save output visualizations. "
+        "If not given, will show output in an OpenCV window.",
+    )
+
+    parser.add_argument(
+        "--confidence-threshold",
+        type=float,
+        default=0.5,
+        help="Minimum score for instance predictions to be shown",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options using the command-line 'KEY VALUE' pairs",
+        default=[],
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+
+
+if __name__ == "__main__":
+    mp.set_start_method("spawn", force=True)
+    args = get_parser().parse_args()
+    setup_logger(name="fvcore")
+    logger = setup_logger()
+    logger.info("Arguments: " + str(args))
+
+    cfg = setup_cfg(args)
+
+    demo = VisualizationDemo(cfg)
+    hh = []
+    if args.input:
+        if len(args.input) == 1:
+            args.input = glob.glob(os.path.expanduser(args.input[0]))
+            assert args.input, "The input path(s) was not found"
+        for path in tqdm.tqdm(args.input, disable=not args.output):
+            # use PIL, to be consistent with evaluation
+            img = read_image(path, format="BGR")
+            start_time = time.time()
+            predictions, visualized_output = demo.run_on_image(img, args.confidence_threshold, path)
+            logger.info(
+                "{}: {} in {:.2f}s".format(
+                    path,
+                    "detected {} instances".format(len(predictions["instances"]))
+                    if "instances" in predictions
+                    else "finished",
+                    time.time() - start_time,
+                )
+            )
+            if args.output:
+                if os.path.isdir(args.output):
+                    assert os.path.isdir(args.output), args.output
+                    out_filename = os.path.join(args.output, os.path.basename(path))
+                else:
+                    assert len(args.input) == 1, "Please specify a directory with args.output"
+                    out_filename = args.output
+                visualized_output.save(out_filename)
+            else:
+                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+                cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
+                if cv2.waitKey(0) == 27:
+                    break  # esc to quit
+    elif args.webcam:
+        assert args.input is None, "Cannot have both --input and --webcam!"
+        assert args.output is None, "output not yet supported with --webcam!"
+        cam = cv2.VideoCapture(0)
+        for vis in tqdm.tqdm(demo.run_on_video(cam, args.confidence_threshold)):
+            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+            cv2.imshow(WINDOW_NAME, vis)
+            if cv2.waitKey(1) == 27:
+                break  # esc to quit
+        cam.release()
+        cv2.destroyAllWindows()
+    elif args.video_input:
+        video = cv2.VideoCapture(args.video_input)
+        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        frames_per_second = video.get(cv2.CAP_PROP_FPS)
+        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        basename = os.path.basename(args.video_input)
+        if args.output:
+            if os.path.isdir(args.output):
+                output_fname = os.path.join(args.output, basename)
+                output_fname = os.path.splitext(output_fname)[0] + ".mkv"
+            else:
+                output_fname = args.output
+            assert not os.path.isfile(output_fname), output_fname
+            output_file = cv2.VideoWriter(
+                filename=output_fname,
+                # some installation of opencv may not support x264 (due to its license),
+                # you can try other format (e.g. MPEG)
+                fourcc=cv2.VideoWriter_fourcc(*"mp4v"), # x264
+                fps=float(frames_per_second),
+                frameSize=(width, height),
+                isColor=True,
+            )
+        assert os.path.isfile(args.video_input)
+        for vis_frame in tqdm.tqdm(demo.run_on_video(video, args.confidence_threshold), total=num_frames):
+            if args.output:
+                output_file.write(vis_frame)
+            else:
+                cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
+                cv2.imshow(basename, vis_frame)
+                if cv2.waitKey(1) == 27:
+                    break  # esc to quit
+        video.release()
+        if args.output:
+            output_file.release()
+        else:
+            cv2.destroyAllWindows()
diff --git a/src/sts/demo/genericmask.py b/src/sts/demo/genericmask.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ecc1fa5bcd7fa75dc09d00877f85005871e31b0
--- /dev/null
+++ b/src/sts/demo/genericmask.py
@@ -0,0 +1,96 @@
+import numpy as np
+import pycocotools.mask as mask_util
+import cv2
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (height, width), m.shape
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        #res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
diff --git a/src/sts/demo/merge.py b/src/sts/demo/merge.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cfe40577c040fe7683f564a3b03bd2d304c9f62
--- /dev/null
+++ b/src/sts/demo/merge.py
@@ -0,0 +1,572 @@
+# import sys
+# sys.path.insert(0, '../Pipeline_POI_Engineering/src/sts')
+# Copyright (c) Facebook, Inc. and its affiliates.
+import argparse
+import glob
+import multiprocessing as mp
+import os
+import time
+import cv2
+import tqdm
+import torch
+import numpy as np
+import json
+from shapely.geometry import Point, Polygon
+from genericmask import GenericMask
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.utils.logger import setup_logger
+from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
+from predictor import VisualizationDemo
+# from beamsearch import beam_search
+from editdistance import eval
+import operator
+# constants
+WINDOW_NAME = "COCO detections"
+
+dictionary = "aàáạảãâầấậẩẫăằắặẳẵAÀÁẠẢÃĂẰẮẶẲẴÂẦẤẬẨẪeèéẹẻẽêềếệểễEÈÉẸẺẼÊỀẾỆỂỄoòóọỏõôồốộổỗơờớợởỡOÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠiìíịỉĩIÌÍỊỈĨuùúụủũưừứựửữƯỪỨỰỬỮUÙÚỤỦŨyỳýỵỷỹYỲÝỴỶỸ"
+
+
+def make_groups():
+    groups = []
+    i = 0
+    while i < len(dictionary) - 5:
+        group = [c for c in dictionary[i: i + 6]]
+        i += 6
+        groups.append(group)
+    return groups
+
+
+groups = make_groups()
+
+TONES = ["", "ˋ", "ˊ", "﹒", "ˀ", "˜"]
+SOURCES = ["ă", "â", "Ă", "Â", "ê", "Ê",
+           "ô", "ơ", "Ô", "Ơ", "ư", "Ư", "Đ", "đ"]
+TARGETS = ["aˇ", "aˆ", "Aˇ", "Aˆ", "eˆ", "Eˆ",
+           "oˆ", "o˒", "Oˆ", "O˒", "u˒", "U˒", "D‑", "d‑"]
+
+
+def parse_tone(word):
+    res = ""
+    tone = ""
+    for char in word:
+        if char in dictionary:
+            for group in groups:
+                if char in group:
+                    if tone == "":
+                        tone = TONES[group.index(char)]
+                    res += group[0]
+        else:
+            res += char
+    res += tone
+    return res
+
+
+def full_parse(word):
+    word = parse_tone(word)
+    res = ""
+    for char in word:
+        if char in SOURCES:
+            res += TARGETS[SOURCES.index(char)]
+        else:
+            res += char
+    return res
+
+
+def correct_tone_position(word):
+    word = word[:-1]
+    if len(word) < 2:
+        pass
+    first_ord_char = ""
+    second_order_char = ""
+    for char in word:
+        for group in groups:
+            if char in group:
+                second_order_char = first_ord_char
+                first_ord_char = group[0]
+    if word[-1] == first_ord_char and second_order_char != "":
+        pair_chars = ["qu", "Qu", "qU", "QU", "gi", "Gi", "gI", "GI"]
+        for pair in pair_chars:
+            if pair in word and second_order_char in ["u", "U", "i", "I"]:
+                return first_ord_char
+        return second_order_char
+    return first_ord_char
+
+
+def decoder(recognition):
+    for char in TARGETS:
+        recognition = recognition.replace(char, SOURCES[TARGETS.index(char)])
+    if len(recognition) < 1:
+        return recognition
+    if recognition[-1] in TONES:
+        if len(recognition) < 2:
+            return recognition
+        replace_char = correct_tone_position(recognition)
+        tone = recognition[-1]
+        recognition = recognition[:-1]
+        for group in groups:
+            if replace_char in group:
+                recognition = recognition.replace(
+                    replace_char, group[TONES.index(tone)])
+    return recognition
+
+
+def decode_recognition(rec):
+    CTLABELS = [" ", "!", '"', "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R",
+                "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_", "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "ˋ", "ˊ", "﹒", "ˀ", "˜", "ˇ", "ˆ", "˒", "‑", ]
+    last_char = False
+    s = ''
+    for c in rec:
+        c = int(c)
+        if 0 < c < 107:
+            s += CTLABELS[c-1]
+            last_char = c
+        elif c == 0:
+            s += u''
+        else:
+            last_char = False
+    if len(s) == 0:
+        s = ' '
+    s = decoder(s)
+    return s
+
+
+def get_mini_boxes(contour, max_x, min_x, thr):
+    bounding_box = cv2.minAreaRect(contour)
+    # print('bbox', bounding_box)
+    bounding_box = list(bounding_box)
+    bounding_box[1] = list(bounding_box[1])
+    if bounding_box[2] <= 45:
+        bounding_box[1][0] = bounding_box[1][0]*thr
+    else:
+        bounding_box[1][1] = bounding_box[1][1]*thr
+    bounding_box[1] = tuple(bounding_box[1])
+    bounding_box = tuple(bounding_box)
+    points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+    index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+    if points[1][1] > points[0][1]:
+        index_1 = 0
+        index_4 = 1
+    else:
+        index_1 = 1
+        index_4 = 0
+    if points[3][1] > points[2][1]:
+        index_2 = 2
+        index_3 = 3
+    else:
+        index_2 = 3
+        index_3 = 2
+    # p1 = np.array([min_x, points[index_1][1]])
+    # p2 = np.array([max_x, points[index_2][1]])
+    # p3 = np.array([max_x, points[index_3][1]])
+    # p4 = np.array([min_x, points[index_4][1]])
+    # box = [p1, p2, p3, p4]
+    box = [
+        points[index_1], points[index_2], points[index_3], points[index_4]
+    ]
+    return box
+
+
+def get_mini_boxes_1(contour):
+    bounding_box = cv2.minAreaRect(contour)
+    points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+    index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+    if points[1][1] > points[0][1]:
+        index_1 = 0
+        index_4 = 1
+    else:
+        index_1 = 1
+        index_4 = 0
+    if points[3][1] > points[2][1]:
+        index_2 = 2
+        index_3 = 3
+    else:
+        index_2 = 3
+        index_3 = 2
+
+    box = [
+        points[index_1], points[index_2], points[index_3], points[index_4]
+    ]
+    return box
+
+
+def calculate_iou(box_1, box_2):
+    # print(box_1, box_2)
+    poly_1 = Polygon(box_1)
+    poly_2 = Polygon(box_2)
+    # print(poly_1.union(poly_2).area)
+    try:
+        iou = poly_1.intersection(poly_2).area / poly_1.union(poly_2).area
+    except:
+        iou = 0
+    return iou
+
+# def get_key(val, my_dict):
+#     for key, value in my_dict.items():
+#         if val in value:
+#             return key
+
+
+def merge_boxes(boxes, recs, trh):
+    dict_bbox = {}
+    x = 0
+    for i in range(len(boxes)-2):
+        tmp_box = [i]
+        db_copy1 = dict_bbox.copy()
+        for key, value in db_copy1.items():
+            if i in value:
+                tmp_box = db_copy1[key]
+                del dict_bbox[key]
+                break
+        for j in range(i+1, len(boxes)-1):
+            ba = cv2.minAreaRect(boxes[i].reshape(-1, 1, 2).astype(int))
+            bb = cv2.minAreaRect(boxes[j].reshape(-1, 1, 2).astype(int))
+            iou = calculate_iou(boxes[i], boxes[j])
+            # scr = min(ba[1][0], bb[1][0])/max(ba[1][0], bb[1][0])
+            if iou > trh:
+                db_copy = dict_bbox.copy()
+                check = False
+                for key, value in db_copy.items():
+                    if i in value:
+                        check = True
+                        tmp_box.remove(i)
+                        tmp_box.extend(db_copy[key])
+                        del dict_bbox[key]
+                        break
+                if check == False:
+                    tmp_box.append(j)
+        dict_bbox[x] = tmp_box
+        x += 1
+    recs_out = []
+    db_clone = {}
+    for key, value in dict_bbox.items():
+        db_clone[key] = list(set(value))
+    for key, value in db_clone.items():
+        tmp_str = []
+        for i in value:
+            tmp_str.append([recs[i], cv2.minAreaRect(
+                boxes[i].reshape(-1, 1, 2).astype(int))[0][0]])
+        recs_out.append(tmp_str)
+    return db_clone, recs_out
+
+
+def combine(dict_box, h, w, boxes):
+    bboxs = []
+    for key, db in dict_box.items():
+        list_box = []
+        for j in db:
+            list_box.append(boxes[j])
+        h1 = h
+        h2 = 0
+        h3 = 0
+        h4 = h
+        w1 = w
+        w2 = w
+        w3 = 0
+        w4 = 0
+        for box in list_box:
+            if box[0, 0] < h1:
+                h1 = box[0, 0]
+            if box[1, 0] > h2:
+                h2 = box[1, 0]
+            if box[2, 0] > h3:
+                h3 = box[2, 0]
+            if box[3, 0] < h4:
+                h4 = box[3, 0]
+            if box[0, 1] < w1:
+                w1 = box[0, 1]
+            if box[1, 1] < w2:
+                w2 = box[1, 1]
+            if box[2, 1] > w3:
+                w3 = box[2, 1]
+            if box[3, 1] > w4:
+                w4 = box[3, 1]
+            tmp = np.array([[h1, w1], [h2, w2], [h3, w3], [h4, w4]])
+        bboxs.append(tmp.astype(np.int16))
+    return bboxs
+
+
+def rec_to_str(recs):
+    rec_1 = []
+    for rec in recs:
+        i = sorted(rec, key=lambda x: x[1])
+        print(i)
+        i = " ".join(decoder(item[0]) for item in i)
+        rec_1.append(i)
+    return rec_1
+
+
+def scale_points(mask):
+    mask_tmp = mask.copy()
+    for i in range(2, len(mask_tmp)-2):
+        for j in range(2, len(mask_tmp[i])-2):
+            if mask_tmp[i][j] != 0:
+                mask[i-2][j-2] = mask[i-2][j-1] = mask[i-2][j] = mask[i-2][j+1] = mask[i-2][j+2] = mask[i-1][j-2] = mask[i-1][j-1] = mask[i-1][j] = mask[i-1][j+1] = mask[i-1][j+2] = mask[i][j-2] = mask[i][j-1] = mask[i][j +
+                                                                                                                                                                                                                            1] = mask[i][j+2] = mask[i+1][j-2] = mask[i+1][j-1] = mask[i+1][j] = mask[i+1][j+1] = mask[i+1][j+2] = mask[i+2][j-2] = mask[i+2][j-1] = mask[i+2][j] = mask[i+2][j+1] = mask[i+2][j+2] = mask_tmp[i][j]
+    return mask
+
+
+def convert_boxes(boxes):
+    if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+        return boxes.tensor.numpy()
+    else:
+        return np.asarray(boxes)
+
+
+def convert_masks(masks_or_polygons, h, w):
+    m = masks_or_polygons
+    if isinstance(m, PolygonMasks):
+        m = m.polygons
+    if isinstance(m, BitMasks):
+        m = m.tensor.numpy()
+    if isinstance(m, torch.Tensor):
+        m = m.numpy()
+    ret = []
+    for x in m:
+        if isinstance(x, GenericMask):
+            ret.append(x)
+        else:
+            ret.append(GenericMask(x, h, w))
+    return ret
+
+
+def setup_cfg(args):
+    # load config from file and command-line arguments
+    cfg = get_cfg()
+
+    from projects.SWINTS.swints import add_SWINTS_config
+    add_SWINTS_config(cfg)
+    # -----
+
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    # Set score_threshold for builtin models
+    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
+    cfg.freeze()
+    return cfg
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Detectron2 demo for builtin configs")
+    parser.add_argument(
+        "--config-file",
+        default="src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-vintext.yaml",
+        metavar="FILE",
+        help="path to config file",
+    )
+    parser.add_argument("--inputfile", nargs="+",
+                        help="A list of array of segmentation", default=["output/ss"])
+    parser.add_argument(
+        "--input",
+        nargs="+",
+        help="A list of space separated input images; "
+        "or a single glob pattern such as 'directory/*.jpg'", default=["images"]
+    )
+    parser.add_argument(
+        "--output",
+        help="A file or directory to save output visualizations. "
+        "If not given, will show output in an OpenCV window.", default="output/sts/"
+    )
+    parser.add_argument(
+        "--output-visualize",
+        help="A file or directory to save output visualizations. "
+        "If not given, will show output in an OpenCV window.", default="output/visualize/"
+    )
+    parser.add_argument(
+        "--confidence-threshold",
+        type=float,
+        default=0.5,
+        help="Minimum score for instance predictions to be shown",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options using the command-line 'KEY VALUE' pairs",
+        default=[],
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+
+
+if __name__ == "__main__":
+    mp.set_start_method("spawn", force=True)
+    args = get_parser().parse_args()
+    setup_logger(name="fvcore")
+    logger = setup_logger()
+    logger.info("Arguments: " + str(args))
+
+    cfg = setup_cfg(args)
+
+    demo = VisualizationDemo(cfg)
+    hh = []
+    if args.inputfile:
+        path_segment = args.inputfile[0]
+    if args.input:
+        if os.path.isdir(args.input[0]):
+            args.input = [os.path.join(args.input[0], fname)
+                          for fname in os.listdir(args.input[0])]
+        elif len(args.input) == 1:
+            args.input = glob.glob(os.path.expanduser(args.input[0]))
+            assert args.input, "The input path(s) was not found"
+        for path in tqdm.tqdm(args.input, disable=not args.output):
+            # use PIL, to be consistent with evaluation
+            print(path)
+            txt_name = str(path.split("/")[-1].split(".")[0]) + '.txt'
+            txt_file = os.path.join(path_segment, txt_name)
+            img = read_image(path, format="BGR")
+            h, w, _ = img.shape
+            start_time = time.time()
+            predictions, visualized_output = demo.run_on_image(
+                img, args.confidence_threshold, path)
+            # time_1 = time.time()-start_time
+
+            mask = np.loadtxt(txt_file,  dtype=np.int32)
+            # time_2 = time.time()-time_1
+            mmax = np.amax(mask)
+            if mmax == 0:
+                mmax = 1
+            mask = scale_points(mask)
+            # time_3 = time.time()-time_2
+
+            outs = cv2.findContours(
+                (mask * int(255/mmax)).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+            if len(outs) == 3:
+                img, contours, _ = outs[0], outs[1], outs[2]
+            elif len(outs) == 2:
+                contours, _ = outs[0], outs[1]
+
+            box_sign = []
+            for contour in contours:
+                points = get_mini_boxes_1(contour)
+                points = np.array(points)
+                box_sign.append(points)
+            # time_4 = time.time()-time_3
+            dict_box_sign = {}
+            dict_box_sign_out = {}
+            dict_rec_sign = {}
+            dict_rec_sign_out = {}
+            in_signboard = 0
+            # full_box = 0
+
+            for i in range(len(box_sign)):
+                dict_box_sign[i] = []
+                dict_box_sign_out[i] = []
+                dict_rec_sign[i] = []
+                dict_rec_sign_out[i] = []
+            list_limit = []
+            for sig in box_sign:
+                # print(sig)
+                max_x = max(sig[0][0], sig[1][0], sig[2][0], sig[3][0])
+                min_x = min(sig[0][0], sig[1][0], sig[2][0], sig[3][0])
+                list_limit.append([max_x, min_x])
+            if "instances" in predictions:
+
+                beziers = []
+                segments = []
+                recc = []
+                scoress = []
+                instances = predictions["instances"].to(torch.device("cpu"))
+                # print("instance",type(instances))
+                instances = instances[instances.scores >
+                                      args.confidence_threshold]
+                boxes = instances.pred_boxes if instances.has(
+                    "pred_boxes") else None
+                scores = instances.scores if instances.has("scores") else None
+                # classes = instances.pred_classes if instances.has("pred_classes") else None
+                recs = instances.pred_rec if instances.has(
+                    "pred_rec") else None
+                # rec_score = instances.pred_rec_score if instances.has("pred_rec_score") else None
+
+                masks = np.asarray(instances.pred_masks)
+                masks = [GenericMask(x, h, w) for x in masks]
+                masks = convert_masks(masks, h, w)
+                polys = []
+                for mask in masks:
+                    polys.append(np.concatenate(
+                        mask.polygons).reshape(-1, 2).tolist())
+
+                # text box into signboard box
+                for bezier, rec, score in zip(polys, recs, scores):
+                    # print(bezier)
+                    if score >= 0.5:
+                        bezier = np.array(
+                            bezier, dtype='int').reshape(-1, 1, 2)
+                        bounding_box = cv2.minAreaRect(bezier)
+                        midpoint = Point(bounding_box[0])
+                        for i in range(len(box_sign)):
+                            poly = Polygon(box_sign[i])
+                            if midpoint.within(poly):
+                                in_signboard += 1
+                                dict_box_sign[i].append(bezier)
+                                dict_rec_sign[i].append(
+                                    full_parse(decode_recognition(rec)))
+                # time_5 = time.time()-time_4
+                for i in range(len(dict_box_sign)):
+                    boxes = []
+                    reces = []
+                    for bezier, rec in zip(dict_box_sign[i], dict_rec_sign[i]):
+                        unclip_ratio = 1.0
+                        bezier = bezier.reshape(-1, 1, 2)
+                        points = get_mini_boxes(
+                            bezier, list_limit[i][0], list_limit[i][1], 3)
+                        box = np.array(points, dtype=np.int16)
+
+                        box[:, 0] = np.clip(np.round(box[:, 0]), 0, w)
+                        box[:, 1] = np.clip(np.round(box[:, 1]), 0, h)
+
+                        boxes.append(box.astype(np.int16))
+                        reces.append(rec)
+
+                    dict_box, rec_out = merge_boxes(boxes, reces, 0.1)
+
+                    rec_outs = rec_to_str(rec_out)
+                    bboxs = combine(dict_box, h, w, boxes)
+                    # print(rec_outs)
+                    dict_box_sign_out[i] = bboxs
+                    dict_rec_sign_out[i] = rec_outs
+                # time_6 = time.time()-time_5
+            # Visualize image after merge boxes
+            img_draw = cv2.imread(path)
+            for i in range(len(dict_box_sign_out)):
+                for j in range(len(dict_box_sign_out[i])):
+                    pts = dict_box_sign_out[i][j]
+                    x, y = pts[0][0], pts[0][1]
+                    pts = np.array(pts, np.int32).reshape((-1, 1, 2))
+                    isClosed = True
+                    color = (255, 0, 0)
+                    thickness = 2
+                    img_draw = cv2.polylines(
+                        img_draw, [pts], isClosed, color, thickness)
+                    cv2.putText(img_draw, dict_rec_sign_out[i][j], (
+                        x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
+            # print(time_1, time_2, time_3, time_4, time_5, time_6)
+            txt_name1 = str(path.split("/")[-1].split(".")[0]) + '_box.txt'
+            txt_name2 = str(path.split("/")[-1].split(".")[0]) + '_text.txt'
+            img_name = str(path.split("/")[-1].split(".")[0]) + '.jpg'
+            if args.output:
+                output_path_box = os.path.join(args.output, txt_name1)
+                output_path_text = os.path.join(args.output, txt_name2)
+                with open(output_path_box, 'w+', encoding='utf-8') as output_file_box:
+                    for index, box in dict_box_sign_out.items():
+                        arr = []
+                        for box_ in box:
+                            arr.append(box_.tolist())
+                        dict_box_sign_out[index] = arr
+                    dict_box_sign_out["file_name"] = str(path.split("/")[-1])
+                    json.dump(dict_box_sign_out, output_file_box,
+                              ensure_ascii=False)
+                with open(output_path_text, 'w+', encoding='utf-8') as output_file_box:
+                    dict_rec_sign_out["file_name"] = str(path.split("/")[-1])
+                    json.dump(dict_rec_sign_out, output_file_box,
+                              ensure_ascii=False)
+            if args.output_visualize:
+                cv2.imwrite(os.path.join(
+                    args.output_visualize, img_name), img_draw)
+            logger.info(
+                "{}: detected {} instances in {:.2f}s".format(
+                    path, len(predictions["instances"]
+                              ), time.time() - start_time
+                )
+            )
diff --git a/src/sts/demo/predictor.py b/src/sts/demo/predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ba47761e00fec3ee57c665e1b95b6fb892047c5
--- /dev/null
+++ b/src/sts/demo/predictor.py
@@ -0,0 +1,224 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import atexit
+import bisect
+import multiprocessing as mp
+from collections import deque
+import cv2
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.engine.defaults import DefaultPredictor
+from detectron2.utils.video_visualizer import VideoVisualizer
+from detectron2.utils.visualizer import ColorMode, Visualizer
+from src.sts.detectron2.utils.visualizer_chn import Visualizer as Visualizer_chn
+from src.sts.detectron2.utils.visualizer_vintext import Visualizer as Visualizer_vintext
+
+class VisualizationDemo(object):
+    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
+        """
+        Args:
+            cfg (CfgNode):
+            instance_mode (ColorMode):
+            parallel (bool): whether to run the model in different processes from visualization.
+                Useful since the visualization logic can be slow.
+        """
+        self.metadata = MetadataCatalog.get(
+            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
+        )
+        self.cpu_device = torch.device("cpu")
+        self.instance_mode = instance_mode
+
+        self.parallel = parallel
+        if parallel:
+            num_gpu = torch.cuda.device_count()
+            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
+        else:
+            self.predictor = DefaultPredictor(cfg)
+
+    def run_on_image(self, image, confidence_threshold, path):
+        """
+        Args:
+            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+                This is the format used by OpenCV.
+
+        Returns:
+            predictions (dict): the output of the model.
+            vis_output (VisImage): the visualized image output.
+        """
+        vis_output = None
+        predictions = self.predictor(image)
+        # Convert image from OpenCV BGR format to Matplotlib RGB format.
+        image = image[:, :, ::-1]
+        visualizer = Visualizer_vintext(image, self.metadata, instance_mode=self.instance_mode)
+        if "panoptic_seg" in predictions:
+            panoptic_seg, segments_info = predictions["panoptic_seg"]
+            vis_output = visualizer.draw_panoptic_seg_predictions(
+                panoptic_seg.to(self.cpu_device), segments_info
+            )
+        else:
+            if "sem_seg" in predictions:
+                vis_output = visualizer.draw_sem_seg(
+                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+                )
+            if "instances" in predictions:
+                instances = predictions["instances"].to(self.cpu_device)
+                instances = instances[instances.scores > confidence_threshold]
+                predictions["instances"] = instances
+                vis_output = visualizer.draw_instance_predictions(predictions=instances, path=path)
+
+        return predictions, vis_output
+
+    def _frame_from_video(self, video):
+        while video.isOpened():
+            success, frame = video.read()
+            if success:
+                yield frame
+            else:
+                break
+
+    def run_on_video(self, video, confidence_threshold):
+        """
+        Visualizes predictions on frames of the input video.
+
+        Args:
+            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
+                either a webcam or a video file.
+
+        Yields:
+            ndarray: BGR visualizations of each video frame.
+        """
+        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
+
+        def process_predictions(frame, predictions):
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+            if "panoptic_seg" in predictions:
+                panoptic_seg, segments_info = predictions["panoptic_seg"]
+                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
+                    frame, panoptic_seg.to(self.cpu_device), segments_info
+                )
+            elif "instances" in predictions:
+                predictions = predictions["instances"].to(self.cpu_device)
+                predictions = predictions[predictions.scores > confidence_threshold]
+                vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
+            elif "sem_seg" in predictions:
+                vis_frame = video_visualizer.draw_sem_seg(
+                    frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+                )
+
+            # Converts Matplotlib RGB format to OpenCV BGR format
+            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
+            return vis_frame
+
+        frame_gen = self._frame_from_video(video)
+        if self.parallel:
+            buffer_size = self.predictor.default_buffer_size
+
+            frame_data = deque()
+
+            for cnt, frame in enumerate(frame_gen):
+                frame_data.append(frame)
+                self.predictor.put(frame)
+
+                if cnt >= buffer_size:
+                    frame = frame_data.popleft()
+                    predictions = self.predictor.get()
+                    yield process_predictions(frame, predictions)
+
+            while len(frame_data):
+                frame = frame_data.popleft()
+                predictions = self.predictor.get()
+                yield process_predictions(frame, predictions)
+        else:
+            for frame in frame_gen:
+                yield process_predictions(frame, self.predictor(frame))
+
+
+class AsyncPredictor:
+    """
+    A predictor that runs the model asynchronously, possibly on >1 GPUs.
+    Because rendering the visualization takes considerably amount of time,
+    this helps improve throughput a little bit when rendering videos.
+    """
+
+    class _StopToken:
+        pass
+
+    class _PredictWorker(mp.Process):
+        def __init__(self, cfg, task_queue, result_queue):
+            self.cfg = cfg
+            self.task_queue = task_queue
+            self.result_queue = result_queue
+            super().__init__()
+
+        def run(self):
+            predictor = DefaultPredictor(self.cfg)
+
+            while True:
+                task = self.task_queue.get()
+                if isinstance(task, AsyncPredictor._StopToken):
+                    break
+                idx, data = task
+                result = predictor(data)
+                self.result_queue.put((idx, result))
+
+    def __init__(self, cfg, num_gpus: int = 1):
+        """
+        Args:
+            cfg (CfgNode):
+            num_gpus (int): if 0, will run on CPU
+        """
+        num_workers = max(num_gpus, 1)
+        self.task_queue = mp.Queue(maxsize=num_workers * 3)
+        self.result_queue = mp.Queue(maxsize=num_workers * 3)
+        self.procs = []
+        for gpuid in range(max(num_gpus, 1)):
+            cfg = cfg.clone()
+            cfg.defrost()
+            cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
+            self.procs.append(
+                AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
+            )
+
+        self.put_idx = 0
+        self.get_idx = 0
+        self.result_rank = []
+        self.result_data = []
+
+        for p in self.procs:
+            p.start()
+        atexit.register(self.shutdown)
+
+    def put(self, image):
+        self.put_idx += 1
+        self.task_queue.put((self.put_idx, image))
+
+    def get(self):
+        self.get_idx += 1  # the index needed for this request
+        if len(self.result_rank) and self.result_rank[0] == self.get_idx:
+            res = self.result_data[0]
+            del self.result_data[0], self.result_rank[0]
+            return res
+
+        while True:
+            # make sure the results are returned in the correct order
+            idx, res = self.result_queue.get()
+            if idx == self.get_idx:
+                return res
+            insert = bisect.bisect(self.result_rank, idx)
+            self.result_rank.insert(insert, idx)
+            self.result_data.insert(insert, res)
+
+    def __len__(self):
+        return self.put_idx - self.get_idx
+
+    def __call__(self, image):
+        self.put(image)
+        return self.get()
+
+    def shutdown(self):
+        for _ in self.procs:
+            self.task_queue.put(AsyncPredictor._StopToken())
+
+    @property
+    def default_buffer_size(self):
+        return len(self.procs) * 5
diff --git a/src/sts/demo/sts.py b/src/sts/demo/sts.py
new file mode 100644
index 0000000000000000000000000000000000000000..f30ef678f3468caadb4ad3997b9c225013ae082c
--- /dev/null
+++ b/src/sts/demo/sts.py
@@ -0,0 +1,517 @@
+import multiprocessing as mp
+import os
+import time
+import cv2
+import torch
+import numpy as np
+import json
+import csv
+from shapely.geometry import Point, Polygon
+from src.sts.demo.genericmask import GenericMask
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.structures import BitMasks, Boxes, PolygonMasks, RotatedBoxes
+from src.sts.demo.predictor import VisualizationDemo
+# constants
+WINDOW_NAME = "COCO detections"
+
+dictionary = "aàáạảãâầấậẩẫăằắặẳẵAÀÁẠẢÃĂẰẮẶẲẴÂẦẤẬẨẪeèéẹẻẽêềếệểễEÈÉẸẺẼÊỀẾỆỂỄoòóọỏõôồốộổỗơờớợởỡOÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠiìíịỉĩIÌÍỊỈĨuùúụủũưừứựửữƯỪỨỰỬỮUÙÚỤỦŨyỳýỵỷỹYỲÝỴỶỸ"
+
+
+def make_groups():
+    groups = []
+    i = 0
+    while i < len(dictionary) - 5:
+        group = [c for c in dictionary[i: i + 6]]
+        i += 6
+        groups.append(group)
+    return groups
+
+
+groups = make_groups()
+
+TONES = ["", "ˋ", "ˊ", "﹒", "ˀ", "˜"]
+SOURCES = ["ă", "â", "Ă", "Â", "ê", "Ê",
+           "ô", "ơ", "Ô", "Ơ", "ư", "Ư", "Đ", "đ"]
+TARGETS = ["aˇ", "aˆ", "Aˇ", "Aˆ", "eˆ", "Eˆ",
+           "oˆ", "o˒", "Oˆ", "O˒", "u˒", "U˒", "D‑", "d‑"]
+
+
+def parse_tone(word):
+    res = ""
+    tone = ""
+    for char in word:
+        if char in dictionary:
+            for group in groups:
+                if char in group:
+                    if tone == "":
+                        tone = TONES[group.index(char)]
+                    res += group[0]
+        else:
+            res += char
+    res += tone
+    return res
+
+
+def full_parse(word):
+    word = parse_tone(word)
+    res = ""
+    for char in word:
+        if char in SOURCES:
+            res += TARGETS[SOURCES.index(char)]
+        else:
+            res += char
+    return res
+
+
+def correct_tone_position(word):
+    word = word[:-1]
+    if len(word) < 2:
+        pass
+    first_ord_char = ""
+    second_order_char = ""
+    for char in word:
+        for group in groups:
+            if char in group:
+                second_order_char = first_ord_char
+                first_ord_char = group[0]
+    if word[-1] == first_ord_char and second_order_char != "":
+        pair_chars = ["qu", "Qu", "qU", "QU", "gi", "Gi", "gI", "GI"]
+        for pair in pair_chars:
+            if pair in word and second_order_char in ["u", "U", "i", "I"]:
+                return first_ord_char
+        return second_order_char
+    return first_ord_char
+
+
+def decoder(recognition):
+    for char in TARGETS:
+        recognition = recognition.replace(char, SOURCES[TARGETS.index(char)])
+    if len(recognition) < 1:
+        return recognition
+    if recognition[-1] in TONES:
+        if len(recognition) < 2:
+            return recognition
+        replace_char = correct_tone_position(recognition)
+        tone = recognition[-1]
+        recognition = recognition[:-1]
+        for group in groups:
+            if replace_char in group:
+                recognition = recognition.replace(
+                    replace_char, group[TONES.index(tone)])
+    return recognition
+
+
+def decode_recognition(rec):
+    CTLABELS = [" ", "!", '"', "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R",
+                "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_", "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "ˋ", "ˊ", "﹒", "ˀ", "˜", "ˇ", "ˆ", "˒", "‑", ]
+    last_char = False
+    s = ''
+    for c in rec:
+        c = int(c)
+        if 0 < c < 107:
+            s += CTLABELS[c-1]
+            last_char = c
+        elif c == 0:
+            s += u''
+        else:
+            last_char = False
+    if len(s) == 0:
+        s = ' '
+    s = decoder(s)
+    return s
+
+
+def get_mini_boxes(contour, max_x, min_x, thr):
+    bounding_box = cv2.minAreaRect(contour)
+    # print('bbox', bounding_box)
+    bounding_box = list(bounding_box)
+    bounding_box[1] = list(bounding_box[1])
+    if bounding_box[2] <= 45:
+        bounding_box[1][0] = bounding_box[1][0]*thr
+    else:
+        bounding_box[1][1] = bounding_box[1][1]*thr
+    bounding_box[1] = tuple(bounding_box[1])
+    bounding_box = tuple(bounding_box)
+    points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+    index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+    if points[1][1] > points[0][1]:
+        index_1 = 0
+        index_4 = 1
+    else:
+        index_1 = 1
+        index_4 = 0
+    if points[3][1] > points[2][1]:
+        index_2 = 2
+        index_3 = 3
+    else:
+        index_2 = 3
+        index_3 = 2
+    # p1 = np.array([min_x, points[index_1][1]])
+    # p2 = np.array([max_x, points[index_2][1]])
+    # p3 = np.array([max_x, points[index_3][1]])
+    # p4 = np.array([min_x, points[index_4][1]])
+    # box = [p1, p2, p3, p4]
+    box = [
+        points[index_1], points[index_2], points[index_3], points[index_4]
+    ]
+    return box
+
+
+def get_mini_boxes_1(contour):
+    bounding_box = cv2.minAreaRect(contour)
+    points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+    index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+    if points[1][1] > points[0][1]:
+        index_1 = 0
+        index_4 = 1
+    else:
+        index_1 = 1
+        index_4 = 0
+    if points[3][1] > points[2][1]:
+        index_2 = 2
+        index_3 = 3
+    else:
+        index_2 = 3
+        index_3 = 2
+
+    box = [
+        points[index_1], points[index_2], points[index_3], points[index_4]
+    ]
+    return box
+
+
+def calculate_iou(box_1, box_2):
+    # print(box_1, box_2)
+    poly_1 = Polygon(box_1)
+    poly_2 = Polygon(box_2)
+    # print(poly_1.union(poly_2).area)
+    try:
+        iou = poly_1.intersection(poly_2).area / poly_1.union(poly_2).area
+    except:
+        iou = 0
+    return iou
+
+
+def merge_boxes(boxes, recs, trh):
+    dict_bbox = {}
+    x = 0
+    for i in range(len(boxes)-2):
+        tmp_box = [i]
+        db_copy1 = dict_bbox.copy()
+        for key, value in db_copy1.items():
+            if i in value:
+                tmp_box = db_copy1[key]
+                del dict_bbox[key]
+                break
+        for j in range(i+1, len(boxes)-1):
+            ba = cv2.minAreaRect(boxes[i].reshape(-1, 1, 2).astype(int))
+            bb = cv2.minAreaRect(boxes[j].reshape(-1, 1, 2).astype(int))
+            iou = calculate_iou(boxes[i], boxes[j])
+            # scr = min(ba[1][0], bb[1][0])/max(ba[1][0], bb[1][0])
+            if iou > trh:
+                db_copy = dict_bbox.copy()
+                check = False
+                for key, value in db_copy.items():
+                    if i in value:
+                        check = True
+                        tmp_box.remove(i)
+                        tmp_box.extend(db_copy[key])
+                        del dict_bbox[key]
+                        break
+                if check == False:
+                    tmp_box.append(j)
+        dict_bbox[x] = tmp_box
+        x += 1
+    recs_out = []
+    db_clone = {}
+    for key, value in dict_bbox.items():
+        db_clone[key] = list(set(value))
+    for key, value in db_clone.items():
+        tmp_str = []
+        for i in value:
+            tmp_str.append([recs[i], cv2.minAreaRect(
+                boxes[i].reshape(-1, 1, 2).astype(int))[0][0]])
+        recs_out.append(tmp_str)
+    return db_clone, recs_out
+
+
+def combine(dict_box, h, w, boxes):
+    bboxs = []
+    for key, db in dict_box.items():
+        list_box = []
+        for j in db:
+            list_box.append(boxes[j])
+        h1 = h
+        h2 = 0
+        h3 = 0
+        h4 = h
+        w1 = w
+        w2 = w
+        w3 = 0
+        w4 = 0
+        for box in list_box:
+            if box[0, 0] < h1:
+                h1 = box[0, 0]
+            if box[1, 0] > h2:
+                h2 = box[1, 0]
+            if box[2, 0] > h3:
+                h3 = box[2, 0]
+            if box[3, 0] < h4:
+                h4 = box[3, 0]
+            if box[0, 1] < w1:
+                w1 = box[0, 1]
+            if box[1, 1] < w2:
+                w2 = box[1, 1]
+            if box[2, 1] > w3:
+                w3 = box[2, 1]
+            if box[3, 1] > w4:
+                w4 = box[3, 1]
+            tmp = np.array([[h1, w1], [h2, w2], [h3, w3], [h4, w4]])
+        bboxs.append(tmp.astype(np.int16))
+    return bboxs
+
+
+def rec_to_str(recs):
+    rec_1 = []
+    for rec in recs:
+        i = sorted(rec, key=lambda x: x[1])
+        i = " ".join(decoder(item[0]) for item in i)
+        rec_1.append(i)
+    return rec_1
+
+
+def scale_points(mask):
+    mask_tmp = mask.copy()
+    for i in range(2, len(mask_tmp)-2):
+        for j in range(2, len(mask_tmp[i])-2):
+            if mask_tmp[i][j] != 0:
+                mask[i-2][j-2] = mask[i-2][j-1] = mask[i-2][j] = mask[i-2][j+1] = mask[i-2][j+2] = mask[i-1][j-2] = mask[i-1][j-1] = mask[i-1][j] = mask[i-1][j+1] = mask[i-1][j+2] = mask[i][j-2] = mask[i][j-1] = mask[i][j +
+                                                                                                                                                                                                                            1] = mask[i][j+2] = mask[i+1][j-2] = mask[i+1][j-1] = mask[i+1][j] = mask[i+1][j+1] = mask[i+1][j+2] = mask[i+2][j-2] = mask[i+2][j-1] = mask[i+2][j] = mask[i+2][j+1] = mask[i+2][j+2] = mask_tmp[i][j]
+    return mask
+
+
+def convert_boxes(boxes):
+    if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+        return boxes.tensor.numpy()
+    else:
+        return np.asarray(boxes)
+
+
+def convert_masks(masks_or_polygons, h, w):
+    m = masks_or_polygons
+    if isinstance(m, PolygonMasks):
+        m = m.polygons
+    if isinstance(m, BitMasks):
+        m = m.tensor.numpy()
+    if isinstance(m, torch.Tensor):
+        m = m.numpy()
+    ret = []
+    for x in m:
+        if isinstance(x, GenericMask):
+            ret.append(x)
+        else:
+            ret.append(GenericMask(x, h, w))
+    return ret
+
+
+def setup_cfg(args):
+    # load config from file and command-line arguments
+    cfg = get_cfg()
+
+    from projects.SWINTS.swints import add_SWINTS_config
+    add_SWINTS_config(cfg)
+    # -----
+
+    cfg.merge_from_file(args["config_file"])
+    cfg.merge_from_list(args["opts"])
+    # Set score_threshold for builtin models
+    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args["confidence_threshold"]
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args["confidence_threshold"]
+    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args["confidence_threshold"]
+    cfg.freeze()
+    return cfg
+
+
+def handle_sts(image, output, seg_boxes, seg_ids):
+    args_ = {
+        "config_file": "src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-vintext.yaml",
+        "confidence_threshold": 0.5,
+        "opts": ["MODEL.WEIGHTS", "./checkpoints/sts/sts.pth"]
+    }
+    mp.set_start_method("spawn", force=True)
+
+    cfg = setup_cfg(args_)
+
+    demo = VisualizationDemo(cfg)
+    # if segment:
+    #     path_segment = segment
+    box_sign = []
+    if seg_boxes:
+        for seg_box in seg_boxes:
+            x1, y1, x2, y2 = seg_box
+            x1 = np.int(x1)
+            y1 = np.int(y1)
+            x2 = np.int(x2)
+            y2 = np.int(y2)
+            box_sign.append(np.array([[x1,y1], [x2, y1], [x2, y2], [x1, y2]]))
+    # print(box_sign)
+    if image:
+        # use PIL, to be consistent with evaluation
+        img = read_image(image, format="BGR")
+        h, w, _ = img.shape
+        # start_time = time.time()
+        predictions, visualized_output = demo.run_on_image(
+            img, args_["confidence_threshold"], image)
+        # time_1 = time.time()-start_time
+
+        # mask = np.loadtxt(path_segment,  dtype=np.int32)
+        # time_2 = time.time()-time_1
+        # mmax = np.amax(mask)
+        # if mmax == 0:
+        #     mmax = 1
+        # mask = scale_points(mask)
+        # time_3 = time.time()-time_2
+
+        # outs = cv2.findContours(
+        #     (mask * int(255/mmax)).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+        # if len(outs) == 3:
+        #     img, contours, _ = outs[0], outs[1], outs[2]
+        # elif len(outs) == 2:
+        #     contours, _ = outs[0], outs[1]
+
+        # for contour in contours:
+        #     points = get_mini_boxes_1(contour)
+        #     points = np.array(points)
+        #     box_sign.append(points)
+        # time_4 = time.time()-time_3
+        dict_box_sign = {}
+        dict_box_sign_out = {}
+        dict_rec_sign = {}
+        dict_rec_sign_out = {}
+        in_signboard = 0
+        # full_box = 0
+
+        for i in range(len(box_sign)):
+            dict_box_sign[seg_ids[i]] = []
+            dict_box_sign_out[seg_ids[i]] = []
+            dict_rec_sign[seg_ids[i]] = []
+            dict_rec_sign_out[seg_ids[i]] = []
+        list_limit = []
+        print(dict_rec_sign_out)
+        try:
+            for sig in box_sign:
+                # print(sig)
+                max_x = max(sig[0][0], sig[1][0], sig[2][0], sig[3][0])
+                min_x = min(sig[0][0], sig[1][0], sig[2][0], sig[3][0])
+                list_limit.append([max_x, min_x])
+            if "instances" in predictions:
+                instances = predictions["instances"].to(torch.device("cpu"))
+                # print("instance",type(instances))
+                instances = instances[instances.scores >
+                                        args_["confidence_threshold"]]
+                boxes = instances.pred_boxes if instances.has(
+                    "pred_boxes") else None
+                scores = instances.scores if instances.has("scores") else None
+                # classes = instances.pred_classes if instances.has("pred_classes") else None
+                recs = instances.pred_rec if instances.has(
+                    "pred_rec") else None
+                # rec_score = instances.pred_rec_score if instances.has("pred_rec_score") else None
+
+                masks = np.asarray(instances.pred_masks)
+                masks = [GenericMask(x, h, w) for x in masks]
+                masks = convert_masks(masks, h, w)
+                polys = []
+                for mask in masks:
+                    polys.append(np.concatenate(
+                        mask.polygons).reshape(-1, 2).tolist())
+
+                # text box into signboard box
+                for bezier, rec, score in zip(polys, recs, scores):
+                    # print(bezier)
+                    if score >= 0.5:
+                        bezier = np.array(
+                            bezier, dtype='int').reshape(-1, 1, 2)
+                        bounding_box = cv2.minAreaRect(bezier)
+                        midpoint = Point(bounding_box[0])
+                        for i in range(len(box_sign)):
+                            poly = Polygon(box_sign[i])
+                            if midpoint.within(poly):
+                                in_signboard += 1
+                                dict_box_sign[seg_ids[i]].append(bezier)
+                                dict_rec_sign[seg_ids[i]].append(
+                                    full_parse(decode_recognition(rec)))
+                # time_5 = time.time()-time_4
+                for i in range(len(dict_box_sign)):
+                    boxes = []
+                    reces = []
+                    for bezier, rec in zip(dict_box_sign[seg_ids[i]], dict_rec_sign[seg_ids[i]]):
+                        unclip_ratio = 1.0
+                        bezier = bezier.reshape(-1, 1, 2)
+                        points = get_mini_boxes(
+                            bezier, list_limit[i][0], list_limit[i][1], 3)
+                        box = np.array(points, dtype=np.int16)
+
+                        box[:, 0] = np.clip(np.round(box[:, 0]), 0, w)
+                        box[:, 1] = np.clip(np.round(box[:, 1]), 0, h)
+
+                        boxes.append(box.astype(np.int16))
+                        reces.append(rec)
+
+                    dict_box, rec_out = merge_boxes(boxes, reces, 0.1)
+
+                    rec_outs = rec_to_str(rec_out)
+                    bboxs = combine(dict_box, h, w, boxes)
+                    # print(rec_outs)
+                    dict_box_sign_out[seg_ids[i]] = bboxs
+                    dict_rec_sign_out[seg_ids[i]] = rec_outs
+                # time_6 = time.time()-time_5
+            # Visualize image after merge boxes
+            img_draw = cv2.imread(image)
+            # print(dict_box_sign_out)
+            # for key, values in dict_box_sign_out:
+            #     for value in values:
+            #         pts = value
+            #         x, y = pts[0][0], pts[0][1]
+            #         pts = np.array(pts, np.int32).reshape((-1, 1, 2))
+            #         isClosed = True
+            #         color = (255, 0, 0)
+            #         thickness = 2
+            #         img_draw = cv2.polylines(
+            #             img_draw, [pts], isClosed, color, thickness)
+            #         cv2.putText(img_draw, , (
+            #             x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
+            # print(time_1, time_2, time_3, time_4, time_5, time_6)
+            
+            if output:
+                tags = ["Tag", "Signboard", "Frame", "Name", "Number", "Street", "Ward", "City_District", "City"]
+                name = os.path.basename(image).split(".")[0]
+                for key, values in dict_rec_sign_out.items():
+                    labels = ["Value", key, name, values, "", "", "", "", ""]
+                    if os.path.isfile(os.path.join(output, str(key) + "_" + str(name) + '.csv')):
+                        os.remove(os.path.join(output, str(key) + "_" + str(name) + '.csv'))
+                    with open(os.path.join(output, str(key) + "_" + str(name) + '.csv'), "a", encoding='utf-8') as f:
+                        writer = csv.writer(f)
+                        for i in range(len(tags)):
+                            writer.writerow([tags[i], labels[i]])
+                # txt_name1 = str(image.split("/")[-1].split(".")[0]) + '_box.txt'
+                # txt_name2 = str(image.split("/")[-1].split(".")[0]) + '_text.txt'
+                # img_name = str(image.split("/")[-1].split(".")[0]) + '.jpg'
+                # output_path_box = os.path.join(output, txt_name1)
+                # output_path_text = os.path.join(output, txt_name2)
+                # output_path_image = os.path.join(output, img_name)
+                # with open(output_path_box, 'w+', encoding='utf-8') as output_file_box:
+                #     for index, box in dict_box_sign_out.items():
+                #         arr = []
+                #         for box_ in box:
+                #             arr.append(box_.tolist())
+                #         dict_box_sign_out[index] = arr
+                #     json.dump(dict_box_sign_out, output_file_box,
+                #                 ensure_ascii=False)
+                # with open(output_path_text, 'w+', encoding='utf-8') as output_file_box:
+                #     json.dump(dict_rec_sign_out, output_file_box,
+                #                 ensure_ascii=False)
+                # cv2.imwrite(output_path_image, img_draw)
+        except:
+            pass
+    return dict_box_sign_out, dict_rec_sign_out
\ No newline at end of file
diff --git a/src/sts/detectron2.egg-info/PKG-INFO b/src/sts/detectron2.egg-info/PKG-INFO
new file mode 100644
index 0000000000000000000000000000000000000000..76509a7696b6c1c65ca320ed0843e83a4d2ecfbc
--- /dev/null
+++ b/src/sts/detectron2.egg-info/PKG-INFO
@@ -0,0 +1,9 @@
+Metadata-Version: 2.1
+Name: detectron2
+Version: 0.4
+Summary: Detectron2 is FAIR's next-generation research platform for object detection and segmentation.
+Home-page: https://github.com/facebookresearch/detectron2
+Author: FAIR
+Requires-Python: >=3.6
+Provides-Extra: all
+Provides-Extra: dev
diff --git a/src/sts/detectron2.egg-info/SOURCES.txt b/src/sts/detectron2.egg-info/SOURCES.txt
new file mode 100644
index 0000000000000000000000000000000000000000..653fcd5cf68045362303d4c89b374c7fdd9ca062
--- /dev/null
+++ b/src/sts/detectron2.egg-info/SOURCES.txt
@@ -0,0 +1,281 @@
+README.md
+setup.cfg
+setup.py
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cuda_version.cu
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/vision.cpp
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.cpp
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.cu
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
+/home/kienvs/sources/poi_engineering_api/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
+detectron2/__init__.py
+detectron2.egg-info/PKG-INFO
+detectron2.egg-info/SOURCES.txt
+detectron2.egg-info/dependency_links.txt
+detectron2.egg-info/requires.txt
+detectron2.egg-info/top_level.txt
+detectron2/checkpoint/__init__.py
+detectron2/checkpoint/c2_model_loading.py
+detectron2/checkpoint/catalog.py
+detectron2/checkpoint/detection_checkpoint.py
+detectron2/config/__init__.py
+detectron2/config/compat.py
+detectron2/config/config.py
+detectron2/config/defaults.py
+detectron2/config/instantiate.py
+detectron2/data/__init__.py
+detectron2/data/build.py
+detectron2/data/catalog.py
+detectron2/data/common.py
+detectron2/data/dataset_mapper.py
+detectron2/data/detection_utils.py
+detectron2/data/datasets/__init__.py
+detectron2/data/datasets/builtin.py
+detectron2/data/datasets/builtin_meta.py
+detectron2/data/datasets/cityscapes.py
+detectron2/data/datasets/cityscapes_panoptic.py
+detectron2/data/datasets/coco.py
+detectron2/data/datasets/coco_panoptic.py
+detectron2/data/datasets/lvis.py
+detectron2/data/datasets/lvis_v0_5_categories.py
+detectron2/data/datasets/lvis_v1_categories.py
+detectron2/data/datasets/pascal_voc.py
+detectron2/data/datasets/register_coco.py
+detectron2/data/samplers/__init__.py
+detectron2/data/samplers/distributed_sampler.py
+detectron2/data/samplers/grouped_batch_sampler.py
+detectron2/data/transforms/__init__.py
+detectron2/data/transforms/augmentation.py
+detectron2/data/transforms/augmentation_impl.py
+detectron2/data/transforms/transform.py
+detectron2/engine/__init__.py
+detectron2/engine/defaults.py
+detectron2/engine/hooks.py
+detectron2/engine/launch.py
+detectron2/engine/train_loop.py
+detectron2/evaluation/__init__.py
+detectron2/evaluation/cityscapes_evaluation.py
+detectron2/evaluation/coco_evaluation.py
+detectron2/evaluation/evaluator.py
+detectron2/evaluation/fast_eval_api.py
+detectron2/evaluation/lvis_evaluation.py
+detectron2/evaluation/panoptic_evaluation.py
+detectron2/evaluation/pascal_voc_evaluation.py
+detectron2/evaluation/rotated_coco_evaluation.py
+detectron2/evaluation/rrc_evaluation_funcs.py
+detectron2/evaluation/rrc_evaluation_funcs_ic15.py
+detectron2/evaluation/sem_seg_evaluation.py
+detectron2/evaluation/testing.py
+detectron2/evaluation/text_eval_script.py
+detectron2/evaluation/text_eval_script_ic15.py
+detectron2/evaluation/text_evaluation.py
+detectron2/export/__init__.py
+detectron2/export/api.py
+detectron2/export/c10.py
+detectron2/export/caffe2_export.py
+detectron2/export/caffe2_inference.py
+detectron2/export/caffe2_modeling.py
+detectron2/export/caffe2_patch.py
+detectron2/export/flatten.py
+detectron2/export/shared.py
+detectron2/export/torchscript.py
+detectron2/export/torchscript_patch.py
+detectron2/layers/__init__.py
+detectron2/layers/aspp.py
+detectron2/layers/batch_norm.py
+detectron2/layers/blocks.py
+detectron2/layers/deform_conv.py
+detectron2/layers/mask_ops.py
+detectron2/layers/nms.py
+detectron2/layers/roi_align.py
+detectron2/layers/roi_align_rotated.py
+detectron2/layers/rotated_boxes.py
+detectron2/layers/shape_spec.py
+detectron2/layers/wrappers.py
+detectron2/model_zoo/__init__.py
+detectron2/model_zoo/model_zoo.py
+detectron2/model_zoo/configs/Base-RCNN-C4.yaml
+detectron2/model_zoo/configs/Base-RCNN-DilatedC5.yaml
+detectron2/model_zoo/configs/Base-RCNN-FPN.yaml
+detectron2/model_zoo/configs/Base-RetinaNet.yaml
+detectron2/model_zoo/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
+detectron2/model_zoo/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
+detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
+detectron2/model_zoo/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
+detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
+detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
+detectron2/model_zoo/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
+detectron2/model_zoo/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
+detectron2/model_zoo/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
+detectron2/model_zoo/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
+detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
+detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
+detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
+detectron2/model_zoo/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
+detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
+detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
+detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
+detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
+detectron2/model_zoo/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
+detectron2/model_zoo/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
+detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
+detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
+detectron2/model_zoo/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
+detectron2/model_zoo/configs/Misc/semantic_R_50_FPN_1x.yaml
+detectron2/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
+detectron2/model_zoo/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
+detectron2/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
+detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
+detectron2/model_zoo/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
+detectron2/modeling/__init__.py
+detectron2/modeling/anchor_generator.py
+detectron2/modeling/box_regression.py
+detectron2/modeling/matcher.py
+detectron2/modeling/mmdet_wrapper.py
+detectron2/modeling/poolers.py
+detectron2/modeling/postprocessing.py
+detectron2/modeling/sampling.py
+detectron2/modeling/test_time_augmentation.py
+detectron2/modeling/backbone/__init__.py
+detectron2/modeling/backbone/backbone.py
+detectron2/modeling/backbone/build.py
+detectron2/modeling/backbone/fpn.py
+detectron2/modeling/backbone/fpn_swin.py
+detectron2/modeling/backbone/resnet.py
+detectron2/modeling/backbone/swin_transformer.py
+detectron2/modeling/meta_arch/__init__.py
+detectron2/modeling/meta_arch/build.py
+detectron2/modeling/meta_arch/panoptic_fpn.py
+detectron2/modeling/meta_arch/rcnn.py
+detectron2/modeling/meta_arch/retinanet.py
+detectron2/modeling/meta_arch/semantic_seg.py
+detectron2/modeling/proposal_generator/__init__.py
+detectron2/modeling/proposal_generator/build.py
+detectron2/modeling/proposal_generator/proposal_utils.py
+detectron2/modeling/proposal_generator/rpn.py
+detectron2/modeling/proposal_generator/rrpn.py
+detectron2/modeling/roi_heads/__init__.py
+detectron2/modeling/roi_heads/box_head.py
+detectron2/modeling/roi_heads/cascade_rcnn.py
+detectron2/modeling/roi_heads/fast_rcnn.py
+detectron2/modeling/roi_heads/keypoint_head.py
+detectron2/modeling/roi_heads/mask_head.py
+detectron2/modeling/roi_heads/roi_heads.py
+detectron2/modeling/roi_heads/rotated_fast_rcnn.py
+detectron2/projects/__init__.py
+detectron2/solver/__init__.py
+detectron2/solver/build.py
+detectron2/solver/lr_scheduler.py
+detectron2/structures/__init__.py
+detectron2/structures/boxes.py
+detectron2/structures/image_list.py
+detectron2/structures/instances.py
+detectron2/structures/keypoints.py
+detectron2/structures/masks.py
+detectron2/structures/rotated_boxes.py
+detectron2/utils/__init__.py
+detectron2/utils/analysis.py
+detectron2/utils/collect_env.py
+detectron2/utils/colormap.py
+detectron2/utils/comm.py
+detectron2/utils/env.py
+detectron2/utils/events.py
+detectron2/utils/file_io.py
+detectron2/utils/logger.py
+detectron2/utils/memory.py
+detectron2/utils/registry.py
+detectron2/utils/serialize.py
+detectron2/utils/testing.py
+detectron2/utils/video_visualizer.py
+detectron2/utils/visualizer.py
+detectron2/utils/visualizer_chn.py
+detectron2/utils/visualizer_vintext.py
+projects/SWINTS/swints/FocalTransformer.py
+projects/SWINTS/swints/MaskEncoding.py
+projects/SWINTS/swints/__init__.py
+projects/SWINTS/swints/beam_search.py
+projects/SWINTS/swints/config.py
+projects/SWINTS/swints/dataset_mapper.py
+projects/SWINTS/swints/head.py
+projects/SWINTS/swints/loss.py
+projects/SWINTS/swints/rec_stage.py
+projects/SWINTS/swints/roi_seq_predictors.py
+projects/SWINTS/swints/swints.py
+projects/SWINTS/swints/topk.py
+projects/SWINTS/swints/transformer.py
+tests/test_checkpoint.py
+tests/test_engine.py
+tests/test_events.py
+tests/test_export_caffe2.py
+tests/test_export_torchscript.py
+tests/test_instantiate_config.py
+tests/test_model_analysis.py
+tests/test_model_zoo.py
+tests/test_packaging.py
+tests/test_registry.py
+tests/test_scheduler.py
+tests/test_visualizer.py
+tests/test_yacs_config.py
\ No newline at end of file
diff --git a/src/sts/detectron2.egg-info/dependency_links.txt b/src/sts/detectron2.egg-info/dependency_links.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/src/sts/detectron2.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/src/sts/detectron2.egg-info/requires.txt b/src/sts/detectron2.egg-info/requires.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9373c41e7d344d0e0b27f04dce6cec17fa95dc2
--- /dev/null
+++ b/src/sts/detectron2.egg-info/requires.txt
@@ -0,0 +1,30 @@
+termcolor>=1.1
+Pillow>=7.1
+yacs>=0.1.6
+tabulate
+cloudpickle
+matplotlib
+tqdm>4.29.0
+tensorboard
+fvcore<0.1.6,>=0.1.5
+iopath<0.1.8,>=0.1.7
+pycocotools>=2.0.2
+future
+pydot
+omegaconf==2.3.0
+
+[:python_version < "3.7"]
+dataclasses
+
+[all]
+shapely
+psutil
+hydra-core
+panopticapi@ https://github.com/cocodataset/panopticapi/archive/master.zip
+
+[dev]
+flake8==3.8.1
+isort==4.3.21
+black==20.8b1
+flake8-bugbear
+flake8-comprehensions
diff --git a/src/sts/detectron2.egg-info/top_level.txt b/src/sts/detectron2.egg-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42f8e135f1e792391493b6c9072292eeb0e91010
--- /dev/null
+++ b/src/sts/detectron2.egg-info/top_level.txt
@@ -0,0 +1 @@
+detectron2
diff --git a/src/sts/detectron2/_C.cpython-38-x86_64-linux-gnu.so b/src/sts/detectron2/_C.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..f91e4840eab0cd41f653dfa983933ea0c8fd12f1
--- /dev/null
+++ b/src/sts/detectron2/_C.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8443dc289ba64e8e997f7ce7b88e54af11e54f8b9022c73b9cb76f78390cbd1
+size 21233064
diff --git a/src/sts/detectron2/__init__.py b/src/sts/detectron2/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a951838f58f8bcf4b2b51a94b2ba31c53e8fe1af
--- /dev/null
+++ b/src/sts/detectron2/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from .utils.env import setup_environment
+
+setup_environment()
+
+
+# This line will be programatically read/write by setup.py.
+# Leave them at the bottom of this file and don't touch them.
+__version__ = "0.4"
diff --git a/src/sts/detectron2/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..91c8773087d6a1e5ce4efe440e23829239724002
Binary files /dev/null and b/src/sts/detectron2/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/checkpoint/__init__.py b/src/sts/detectron2/checkpoint/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..99da0469ae7e169d8970e4b642fed3f870076860
--- /dev/null
+++ b/src/sts/detectron2/checkpoint/__init__.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+# File:
+
+
+from . import catalog as _UNUSED  # register the handler
+from .detection_checkpoint import DetectionCheckpointer
+from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
+
+__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
diff --git a/src/sts/detectron2/checkpoint/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/checkpoint/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2eaa6df812fbb13ca2d98dc8ec561c7b6f453966
Binary files /dev/null and b/src/sts/detectron2/checkpoint/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/checkpoint/__pycache__/c2_model_loading.cpython-38.pyc b/src/sts/detectron2/checkpoint/__pycache__/c2_model_loading.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..431480f09939c7f9d7a98b15052733cc1fbfad06
Binary files /dev/null and b/src/sts/detectron2/checkpoint/__pycache__/c2_model_loading.cpython-38.pyc differ
diff --git a/src/sts/detectron2/checkpoint/__pycache__/catalog.cpython-38.pyc b/src/sts/detectron2/checkpoint/__pycache__/catalog.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..edc6b545d4454c210025089b55f0122bee7bac31
Binary files /dev/null and b/src/sts/detectron2/checkpoint/__pycache__/catalog.cpython-38.pyc differ
diff --git a/src/sts/detectron2/checkpoint/__pycache__/detection_checkpoint.cpython-38.pyc b/src/sts/detectron2/checkpoint/__pycache__/detection_checkpoint.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a9086b3b6ed1abe9808a972c26ea940b5b4b5671
Binary files /dev/null and b/src/sts/detectron2/checkpoint/__pycache__/detection_checkpoint.cpython-38.pyc differ
diff --git a/src/sts/detectron2/checkpoint/c2_model_loading.py b/src/sts/detectron2/checkpoint/c2_model_loading.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c8d181bd7200bd3fd38446e743f8f16780d6e76
--- /dev/null
+++ b/src/sts/detectron2/checkpoint/c2_model_loading.py
@@ -0,0 +1,407 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import re
+from typing import Dict, List
+import torch
+from tabulate import tabulate
+
+
+def convert_basic_c2_names(original_keys):
+    """
+    Apply some basic name conversion to names in C2 weights.
+    It only deals with typical backbone models.
+
+    Args:
+        original_keys (list[str]):
+    Returns:
+        list[str]: The same number of strings matching those in original_keys.
+    """
+    layer_keys = copy.deepcopy(original_keys)
+    layer_keys = [
+        {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
+    ]  # some hard-coded mappings
+
+    layer_keys = [k.replace("_", ".") for k in layer_keys]
+    layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
+    layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
+    # Uniform both bn and gn names to "norm"
+    layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
+    layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
+    layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
+    layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
+
+    # stem
+    layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
+    # to avoid mis-matching with "conv1" in other components (e.g. detection head)
+    layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
+
+    # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
+    # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
+    # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
+    # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
+    # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
+
+    # blocks
+    layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
+    layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
+    layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
+    layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
+
+    # DensePose substitutions
+    layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
+    layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
+    layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
+    layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
+    layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
+    return layer_keys
+
+
+def convert_c2_detectron_names(weights):
+    """
+    Map Caffe2 Detectron weight names to Detectron2 names.
+
+    Args:
+        weights (dict): name -> tensor
+
+    Returns:
+        dict: detectron2 names -> tensor
+        dict: detectron2 names -> C2 names
+    """
+    logger = logging.getLogger(__name__)
+    logger.info("Renaming Caffe2 weights ......")
+    original_keys = sorted(weights.keys())
+    layer_keys = copy.deepcopy(original_keys)
+
+    layer_keys = convert_basic_c2_names(layer_keys)
+
+    # --------------------------------------------------------------------------
+    # RPN hidden representation conv
+    # --------------------------------------------------------------------------
+    # FPN case
+    # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
+    # shared for all other levels, hence the appearance of "fpn2"
+    layer_keys = [
+        k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys
+    ]
+    # Non-FPN case
+    layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # RPN box transformation conv
+    # --------------------------------------------------------------------------
+    # FPN case (see note above about "fpn2")
+    layer_keys = [
+        k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas")
+        for k in layer_keys
+    ]
+    layer_keys = [
+        k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits")
+        for k in layer_keys
+    ]
+    # Non-FPN case
+    layer_keys = [
+        k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys
+    ]
+    layer_keys = [
+        k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits")
+        for k in layer_keys
+    ]
+
+    # --------------------------------------------------------------------------
+    # Fast R-CNN box head
+    # --------------------------------------------------------------------------
+    layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
+    layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
+    layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
+    layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
+    # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
+    layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # FPN lateral and output convolutions
+    # --------------------------------------------------------------------------
+    def fpn_map(name):
+        """
+        Look for keys with the following patterns:
+        1) Starts with "fpn.inner."
+           Example: "fpn.inner.res2.2.sum.lateral.weight"
+           Meaning: These are lateral pathway convolutions
+        2) Starts with "fpn.res"
+           Example: "fpn.res2.2.sum.weight"
+           Meaning: These are FPN output convolutions
+        """
+        splits = name.split(".")
+        norm = ".norm" if "norm" in splits else ""
+        if name.startswith("fpn.inner."):
+            # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
+            stage = int(splits[2][len("res") :])
+            return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
+        elif name.startswith("fpn.res"):
+            # splits example: ['fpn', 'res2', '2', 'sum', 'weight']
+            stage = int(splits[1][len("res") :])
+            return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
+        return name
+
+    layer_keys = [fpn_map(k) for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # Mask R-CNN mask head
+    # --------------------------------------------------------------------------
+    # roi_heads.StandardROIHeads case
+    layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
+    layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
+    layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
+    # roi_heads.Res5ROIHeads case
+    layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # Keypoint R-CNN head
+    # --------------------------------------------------------------------------
+    # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
+    layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
+    layer_keys = [
+        k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys
+    ]
+    layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
+
+    # --------------------------------------------------------------------------
+    # Done with replacements
+    # --------------------------------------------------------------------------
+    assert len(set(layer_keys)) == len(layer_keys)
+    assert len(original_keys) == len(layer_keys)
+
+    new_weights = {}
+    new_keys_to_original_keys = {}
+    for orig, renamed in zip(original_keys, layer_keys):
+        new_keys_to_original_keys[renamed] = orig
+        if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
+            # remove the meaningless prediction weight for background class
+            new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
+            new_weights[renamed] = weights[orig][new_start_idx:]
+            logger.info(
+                "Remove prediction weight for background class in {}. The shape changes from "
+                "{} to {}.".format(
+                    renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape)
+                )
+            )
+        elif renamed.startswith("cls_score."):
+            # move weights of bg class from original index 0 to last index
+            logger.info(
+                "Move classification weights for background class in {} from index 0 to "
+                "index {}.".format(renamed, weights[orig].shape[0] - 1)
+            )
+            new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
+        else:
+            new_weights[renamed] = weights[orig]
+
+    return new_weights, new_keys_to_original_keys
+
+
+# Note the current matching is not symmetric.
+# it assumes model_state_dict will have longer names.
+def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
+    """
+    Match names between the two state-dict, and returns a new chkpt_state_dict with names
+    converted to match model_state_dict with heuristics. The returned dict can be later
+    loaded with fvcore checkpointer.
+    If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
+    model and will be renamed at first.
+
+    Strategy: suppose that the models that we will create will have prefixes appended
+    to each of its keys, for example due to an extra level of nesting that the original
+    pre-trained weights from ImageNet won't contain. For example, model.state_dict()
+    might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
+    res2.conv1.weight. We thus want to match both parameters together.
+    For that, we look for each model weight, look among all loaded keys if there is one
+    that is a suffix of the current weight name, and use it if that's the case.
+    If multiple matches exist, take the one with longest size
+    of the corresponding name. For example, for the same model as before, the pretrained
+    weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
+    we want to match backbone[0].body.conv1.weight to conv1.weight, and
+    backbone[0].body.res2.conv1.weight to res2.conv1.weight.
+    """
+    model_keys = sorted(model_state_dict.keys())
+    if c2_conversion:
+        ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
+        # original_keys: the name in the original dict (before renaming)
+    else:
+        original_keys = {x: x for x in ckpt_state_dict.keys()}
+    ckpt_keys = sorted(ckpt_state_dict.keys())
+
+    def match(a, b):
+        # Matched ckpt_key should be a complete (starts with '.') suffix.
+        # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
+        # but matches whatever_conv1 or mesh_head.whatever_conv1.
+        return a == b or a.endswith("." + b)
+
+    # get a matrix of string matches, where each (i, j) entry correspond to the size of the
+    # ckpt_key string, if it matches
+    match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
+    match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
+    # use the matched one with longest size in case of multiple matches
+    max_match_size, idxs = match_matrix.max(1)
+    # remove indices that correspond to no-match
+    idxs[max_match_size == 0] = -1
+
+    logger = logging.getLogger(__name__)
+    # matched_pairs (matched checkpoint key --> matched model key)
+    matched_keys = {}
+    result_state_dict = {}
+    for idx_model, idx_ckpt in enumerate(idxs.tolist()):
+        if idx_ckpt == -1:
+            continue
+        key_model = model_keys[idx_model]
+        key_ckpt = ckpt_keys[idx_ckpt]
+        value_ckpt = ckpt_state_dict[key_ckpt]
+        shape_in_model = model_state_dict[key_model].shape
+
+        if shape_in_model != value_ckpt.shape:
+            logger.warning(
+                "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
+                    key_ckpt, value_ckpt.shape, key_model, shape_in_model
+                )
+            )
+            logger.warning(
+                "{} will not be loaded. Please double check and see if this is desired.".format(
+                    key_ckpt
+                )
+            )
+            continue
+
+        assert key_model not in result_state_dict
+        result_state_dict[key_model] = value_ckpt
+        if key_ckpt in matched_keys:  # already added to matched_keys
+            logger.error(
+                "Ambiguity found for {} in checkpoint!"
+                "It matches at least two keys in the model ({} and {}).".format(
+                    key_ckpt, key_model, matched_keys[key_ckpt]
+                )
+            )
+            raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
+
+        matched_keys[key_ckpt] = key_model
+
+    # logging:
+    matched_model_keys = sorted(matched_keys.values())
+    if len(matched_model_keys) == 0:
+        logger.warning("No weights in checkpoint matched with model.")
+        return ckpt_state_dict
+    common_prefix = _longest_common_prefix(matched_model_keys)
+    rev_matched_keys = {v: k for k, v in matched_keys.items()}
+    original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys}
+
+    model_key_groups = _group_keys_by_module(matched_model_keys, original_keys)
+    table = []
+    memo = set()
+    for key_model in matched_model_keys:
+        if key_model in memo:
+            continue
+        if key_model in model_key_groups:
+            group = model_key_groups[key_model]
+            memo |= set(group)
+            shapes = [tuple(model_state_dict[k].shape) for k in group]
+            table.append(
+                (
+                    _longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*",
+                    _group_str([original_keys[k] for k in group]),
+                    " ".join([str(x).replace(" ", "") for x in shapes]),
+                )
+            )
+        else:
+            key_checkpoint = original_keys[key_model]
+            shape = str(tuple(model_state_dict[key_model].shape))
+            table.append((key_model[len(common_prefix) :], key_checkpoint, shape))
+    table_str = tabulate(
+        table, tablefmt="pipe", headers=["Names in Model", "Names in Checkpoint", "Shapes"]
+    )
+    logger.info(
+        "Following weights matched with "
+        + (f"submodule {common_prefix[:-1]}" if common_prefix else "model")
+        + ":\n"
+        + table_str
+    )
+
+    unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())]
+    for k in unmatched_ckpt_keys:
+        result_state_dict[k] = ckpt_state_dict[k]
+    return result_state_dict
+
+
+def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]):
+    """
+    Params in the same submodule are grouped together.
+
+    Args:
+        keys: names of all parameters
+        original_names: mapping from parameter name to their name in the checkpoint
+
+    Returns:
+        dict[name -> all other names in the same group]
+    """
+
+    def _submodule_name(key):
+        pos = key.rfind(".")
+        if pos < 0:
+            return None
+        prefix = key[: pos + 1]
+        return prefix
+
+    all_submodules = [_submodule_name(k) for k in keys]
+    all_submodules = [x for x in all_submodules if x]
+    all_submodules = sorted(all_submodules, key=len)
+
+    ret = {}
+    for prefix in all_submodules:
+        group = [k for k in keys if k.startswith(prefix)]
+        if len(group) <= 1:
+            continue
+        original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group])
+        if len(original_name_lcp) == 0:
+            # don't group weights if original names don't share prefix
+            continue
+
+        for k in group:
+            if k in ret:
+                continue
+            ret[k] = group
+    return ret
+
+
+def _longest_common_prefix(names: List[str]) -> str:
+    """
+    ["abc.zfg", "abc.zef"] -> "abc."
+    """
+    names = [n.split(".") for n in names]
+    m1, m2 = min(names), max(names)
+    ret = [a for a, b in zip(m1, m2) if a == b]
+    ret = ".".join(ret) + "." if len(ret) else ""
+    return ret
+
+
+def _longest_common_prefix_str(names: List[str]) -> str:
+    m1, m2 = min(names), max(names)
+    lcp = [a for a, b in zip(m1, m2) if a == b]
+    lcp = "".join(lcp)
+    return lcp
+
+
+def _group_str(names: List[str]) -> str:
+    """
+    Turn "common1", "common2", "common3" into "common{1,2,3}"
+    """
+    lcp = _longest_common_prefix_str(names)
+    rest = [x[len(lcp) :] for x in names]
+    rest = "{" + ",".join(rest) + "}"
+    ret = lcp + rest
+
+    # add some simplification for BN specifically
+    ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*")
+    ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*")
+    return ret
diff --git a/src/sts/detectron2/checkpoint/catalog.py b/src/sts/detectron2/checkpoint/catalog.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a85736754a0de4550df96c22f38fc515bd02d71
--- /dev/null
+++ b/src/sts/detectron2/checkpoint/catalog.py
@@ -0,0 +1,115 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+
+from detectron2.utils.file_io import PathHandler, PathManager
+
+
+class ModelCatalog(object):
+    """
+    Store mappings from names to third-party models.
+    """
+
+    S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
+
+    # MSRA models have STRIDE_IN_1X1=True. False otherwise.
+    # NOTE: all BN models here have fused BN into an affine layer.
+    # As a result, you should only load them to a model with "FrozenBN".
+    # Loading them to a model with regular BN or SyncBN is wrong.
+    # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
+    # which should be negligible for training.
+    # NOTE: all models here uses PIXEL_STD=[1,1,1]
+    # NOTE: Most of the BN models here are no longer used. We use the
+    # re-converted pre-trained models under detectron2 model zoo instead.
+    C2_IMAGENET_MODELS = {
+        "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
+        "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
+        "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
+        "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
+        "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
+        "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
+        "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
+    }
+
+    C2_DETECTRON_PATH_FORMAT = (
+        "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl"  # noqa B950
+    )
+
+    C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
+    C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
+
+    # format: {model_name} -> part of the url
+    C2_DETECTRON_MODELS = {
+        "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW",  # noqa B950
+        "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I",  # noqa B950
+        "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7",  # noqa B950
+        "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ",  # noqa B950
+        "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB",  # noqa B950
+        "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC",  # noqa B950
+        "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT",  # noqa B950
+        "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI",  # noqa B950
+        "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q",  # noqa B950
+        "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao",  # noqa B950
+        "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L",  # noqa B950
+        "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179",  # noqa B950
+        "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2",  # noqa B950
+    }
+
+    @staticmethod
+    def get(name):
+        if name.startswith("Caffe2Detectron/COCO"):
+            return ModelCatalog._get_c2_detectron_baseline(name)
+        if name.startswith("ImageNetPretrained/"):
+            return ModelCatalog._get_c2_imagenet_pretrained(name)
+        raise RuntimeError("model not present in the catalog: {}".format(name))
+
+    @staticmethod
+    def _get_c2_imagenet_pretrained(name):
+        prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
+        name = name[len("ImageNetPretrained/") :]
+        name = ModelCatalog.C2_IMAGENET_MODELS[name]
+        url = "/".join([prefix, name])
+        return url
+
+    @staticmethod
+    def _get_c2_detectron_baseline(name):
+        name = name[len("Caffe2Detectron/COCO/") :]
+        url = ModelCatalog.C2_DETECTRON_MODELS[name]
+        if "keypoint_rcnn" in name:
+            dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
+        else:
+            dataset = ModelCatalog.C2_DATASET_COCO
+
+        if "35998355/rpn_R-50-C4_1x" in name:
+            # this one model is somehow different from others ..
+            type = "rpn"
+        else:
+            type = "generalized_rcnn"
+
+        # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
+        url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
+            prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
+        )
+        return url
+
+
+class ModelCatalogHandler(PathHandler):
+    """
+    Resolve URL like catalog://.
+    """
+
+    PREFIX = "catalog://"
+
+    def _get_supported_prefixes(self):
+        return [self.PREFIX]
+
+    def _get_local_path(self, path, **kwargs):
+        logger = logging.getLogger(__name__)
+        catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
+        logger.info("Catalog entry {} points to {}".format(path, catalog_path))
+        return PathManager.get_local_path(catalog_path, **kwargs)
+
+    def _open(self, path, mode="r", **kwargs):
+        return PathManager.open(self._get_local_path(path), mode, **kwargs)
+
+
+PathManager.register_handler(ModelCatalogHandler())
diff --git a/src/sts/detectron2/checkpoint/detection_checkpoint.py b/src/sts/detectron2/checkpoint/detection_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..da979ca72ae76eaee9a4478c01bffc8f58474a18
--- /dev/null
+++ b/src/sts/detectron2/checkpoint/detection_checkpoint.py
@@ -0,0 +1,70 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import pickle
+from fvcore.common.checkpoint import Checkpointer
+
+import detectron2.utils.comm as comm
+from detectron2.utils.file_io import PathManager
+
+from .c2_model_loading import align_and_update_state_dicts
+
+
+class DetectionCheckpointer(Checkpointer):
+    """
+    Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2
+    model zoo, and apply conversions for legacy models.
+    """
+
+    def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
+        is_main_process = comm.is_main_process()
+        super().__init__(
+            model,
+            save_dir,
+            save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
+            **checkpointables,
+        )
+        self.path_manager = PathManager
+
+    def _load_file(self, filename):
+        if filename.endswith(".pkl"):
+            with PathManager.open(filename, "rb") as f:
+                data = pickle.load(f, encoding="latin1")
+            if "model" in data and "__author__" in data:
+                # file is in Detectron2 model zoo format
+                self.logger.info("Reading a file from '{}'".format(data["__author__"]))
+                return data
+            else:
+                # assume file is from Caffe2 / Detectron1 model zoo
+                if "blobs" in data:
+                    # Detection models have "blobs", but ImageNet models don't
+                    data = data["blobs"]
+                data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
+                return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
+
+        loaded = super()._load_file(filename)  # load native pth checkpoint
+        if "model" not in loaded:
+            loaded = {"model": loaded}
+        return loaded
+
+    def _load_model(self, checkpoint):
+        if checkpoint.get("matching_heuristics", False):
+            self._convert_ndarray_to_tensor(checkpoint["model"])
+            # convert weights by name-matching heuristics
+            checkpoint["model"] = align_and_update_state_dicts(
+                self.model.state_dict(),
+                checkpoint["model"],
+                c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
+            )
+        # for non-caffe2 models, use standard ways to load it
+        incompatible = super()._load_model(checkpoint)
+
+        model_buffers = dict(self.model.named_buffers(recurse=False))
+        for k in ["pixel_mean", "pixel_std"]:
+            # Ignore missing key message about pixel_mean/std.
+            # Though they may be missing in old checkpoints, they will be correctly
+            # initialized from config anyway.
+            if k in model_buffers:
+                try:
+                    incompatible.missing_keys.remove(k)
+                except ValueError:
+                    pass
+        return incompatible
diff --git a/src/sts/detectron2/config/__init__.py b/src/sts/detectron2/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3669f7ebe8fffc3539a10932ecccc128a8cc4b6
--- /dev/null
+++ b/src/sts/detectron2/config/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .compat import downgrade_config, upgrade_config
+from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
+
+__all__ = [
+    "CfgNode",
+    "get_cfg",
+    "global_cfg",
+    "set_global_cfg",
+    "downgrade_config",
+    "upgrade_config",
+    "configurable",
+]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/src/sts/detectron2/config/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/config/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8270152d38b3f39657074e1b19b8a0ad24460d3e
Binary files /dev/null and b/src/sts/detectron2/config/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/config/__pycache__/compat.cpython-38.pyc b/src/sts/detectron2/config/__pycache__/compat.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..489c8e69aca704180f64afc07c5040cf0542d23d
Binary files /dev/null and b/src/sts/detectron2/config/__pycache__/compat.cpython-38.pyc differ
diff --git a/src/sts/detectron2/config/__pycache__/config.cpython-38.pyc b/src/sts/detectron2/config/__pycache__/config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..de06e89674e8a4a8c793d78566837b1084fad584
Binary files /dev/null and b/src/sts/detectron2/config/__pycache__/config.cpython-38.pyc differ
diff --git a/src/sts/detectron2/config/__pycache__/defaults.cpython-38.pyc b/src/sts/detectron2/config/__pycache__/defaults.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2f310ea92d39d68262c5d7b46fa4772811cde71a
Binary files /dev/null and b/src/sts/detectron2/config/__pycache__/defaults.cpython-38.pyc differ
diff --git a/src/sts/detectron2/config/compat.py b/src/sts/detectron2/config/compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..11a08c439bf14defd880e37a938fab8a08e68eeb
--- /dev/null
+++ b/src/sts/detectron2/config/compat.py
@@ -0,0 +1,229 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Backward compatibility of configs.
+
+Instructions to bump version:
++ It's not needed to bump version if new keys are added.
+  It's only needed when backward-incompatible changes happen
+  (i.e., some existing keys disappear, or the meaning of a key changes)
++ To bump version, do the following:
+    1. Increment _C.VERSION in defaults.py
+    2. Add a converter in this file.
+
+      Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X,
+      and a function "downgrade" which in-place downgrades config from X to X-1
+
+      In each function, VERSION is left unchanged.
+
+      Each converter assumes that its input has the relevant keys
+      (i.e., the input is not a partial config).
+    3. Run the tests (test_config.py) to make sure the upgrade & downgrade
+       functions are consistent.
+"""
+
+import logging
+from typing import List, Optional, Tuple
+
+from .config import CfgNode as CN
+from .defaults import _C
+
+__all__ = ["upgrade_config", "downgrade_config"]
+
+
+def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN:
+    """
+    Upgrade a config from its current version to a newer version.
+
+    Args:
+        cfg (CfgNode):
+        to_version (int): defaults to the latest version.
+    """
+    cfg = cfg.clone()
+    if to_version is None:
+        to_version = _C.VERSION
+
+    assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format(
+        cfg.VERSION, to_version
+    )
+    for k in range(cfg.VERSION, to_version):
+        converter = globals()["ConverterV" + str(k + 1)]
+        converter.upgrade(cfg)
+        cfg.VERSION = k + 1
+    return cfg
+
+
+def downgrade_config(cfg: CN, to_version: int) -> CN:
+    """
+    Downgrade a config from its current version to an older version.
+
+    Args:
+        cfg (CfgNode):
+        to_version (int):
+
+    Note:
+        A general downgrade of arbitrary configs is not always possible due to the
+        different functionalities in different versions.
+        The purpose of downgrade is only to recover the defaults in old versions,
+        allowing it to load an old partial yaml config.
+        Therefore, the implementation only needs to fill in the default values
+        in the old version when a general downgrade is not possible.
+    """
+    cfg = cfg.clone()
+    assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format(
+        cfg.VERSION, to_version
+    )
+    for k in range(cfg.VERSION, to_version, -1):
+        converter = globals()["ConverterV" + str(k)]
+        converter.downgrade(cfg)
+        cfg.VERSION = k - 1
+    return cfg
+
+
+def guess_version(cfg: CN, filename: str) -> int:
+    """
+    Guess the version of a partial config where the VERSION field is not specified.
+    Returns the version, or the latest if cannot make a guess.
+
+    This makes it easier for users to migrate.
+    """
+    logger = logging.getLogger(__name__)
+
+    def _has(name: str) -> bool:
+        cur = cfg
+        for n in name.split("."):
+            if n not in cur:
+                return False
+            cur = cur[n]
+        return True
+
+    # Most users' partial configs have "MODEL.WEIGHT", so guess on it
+    ret = None
+    if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"):
+        ret = 1
+
+    if ret is not None:
+        logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret))
+    else:
+        ret = _C.VERSION
+        logger.warning(
+            "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format(
+                filename, ret
+            )
+        )
+    return ret
+
+
+def _rename(cfg: CN, old: str, new: str) -> None:
+    old_keys = old.split(".")
+    new_keys = new.split(".")
+
+    def _set(key_seq: List[str], val: str) -> None:
+        cur = cfg
+        for k in key_seq[:-1]:
+            if k not in cur:
+                cur[k] = CN()
+            cur = cur[k]
+        cur[key_seq[-1]] = val
+
+    def _get(key_seq: List[str]) -> CN:
+        cur = cfg
+        for k in key_seq:
+            cur = cur[k]
+        return cur
+
+    def _del(key_seq: List[str]) -> None:
+        cur = cfg
+        for k in key_seq[:-1]:
+            cur = cur[k]
+        del cur[key_seq[-1]]
+        if len(cur) == 0 and len(key_seq) > 1:
+            _del(key_seq[:-1])
+
+    _set(new_keys, _get(old_keys))
+    _del(old_keys)
+
+
+class _RenameConverter:
+    """
+    A converter that handles simple rename.
+    """
+
+    RENAME: List[Tuple[str, str]] = []  # list of tuples of (old name, new name)
+
+    @classmethod
+    def upgrade(cls, cfg: CN) -> None:
+        for old, new in cls.RENAME:
+            _rename(cfg, old, new)
+
+    @classmethod
+    def downgrade(cls, cfg: CN) -> None:
+        for old, new in cls.RENAME[::-1]:
+            _rename(cfg, new, old)
+
+
+class ConverterV1(_RenameConverter):
+    RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")]
+
+
+class ConverterV2(_RenameConverter):
+    """
+    A large bulk of rename, before public release.
+    """
+
+    RENAME = [
+        ("MODEL.WEIGHT", "MODEL.WEIGHTS"),
+        ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"),
+        ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"),
+        ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"),
+        ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"),
+        (
+            "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD",
+            "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH",
+        ),
+        (
+            "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT",
+            "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT",
+        ),
+        (
+            "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD",
+            "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH",
+        ),
+        ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"),
+        ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"),
+        ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"),
+        ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"),
+        ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"),
+        ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"),
+        ("TEST.AUG_ON", "TEST.AUG.ENABLED"),
+        ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"),
+        ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"),
+        ("TEST.AUG_FLIP", "TEST.AUG.FLIP"),
+    ]
+
+    @classmethod
+    def upgrade(cls, cfg: CN) -> None:
+        super().upgrade(cfg)
+
+        if cfg.MODEL.META_ARCHITECTURE == "RetinaNet":
+            _rename(
+                cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS"
+            )
+            _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
+            del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"]
+            del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"]
+        else:
+            _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS")
+            _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
+            del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"]
+            del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"]
+        del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"]
+
+    @classmethod
+    def downgrade(cls, cfg: CN) -> None:
+        super().downgrade(cfg)
+
+        _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS")
+        _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES")
+        cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS
+        cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES
+        cfg.MODEL.RETINANET.ANCHOR_STRIDES = []  # this is not used anywhere in any version
diff --git a/src/sts/detectron2/config/config.py b/src/sts/detectron2/config/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c574b4805c7e0e0a0d0aeb9ca49ca51a2f18c44
--- /dev/null
+++ b/src/sts/detectron2/config/config.py
@@ -0,0 +1,249 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import functools
+import inspect
+import logging
+from fvcore.common.config import CfgNode as _CfgNode
+
+from detectron2.utils.file_io import PathManager
+
+
+class CfgNode(_CfgNode):
+    """
+    The same as `fvcore.common.config.CfgNode`, but different in:
+
+    1. Use unsafe yaml loading by default.
+       Note that this may lead to arbitrary code execution: you must not
+       load a config file from untrusted sources before manually inspecting
+       the content of the file.
+    2. Support config versioning.
+       When attempting to merge an old config, it will convert the old config automatically.
+    """
+
+    @classmethod
+    def _open_cfg(cls, filename):
+        return PathManager.open(filename, "r")
+
+    # Note that the default value of allow_unsafe is changed to True
+    def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None:
+        assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!"
+        loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
+        loaded_cfg = type(self)(loaded_cfg)
+
+        # defaults.py needs to import CfgNode
+        from .defaults import _C
+
+        latest_ver = _C.VERSION
+        assert (
+            latest_ver == self.VERSION
+        ), "CfgNode.merge_from_file is only allowed on a config object of latest version!"
+
+        logger = logging.getLogger(__name__)
+
+        loaded_ver = loaded_cfg.get("VERSION", None)
+        if loaded_ver is None:
+            from .compat import guess_version
+
+            loaded_ver = guess_version(loaded_cfg, cfg_filename)
+        assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
+            loaded_ver, self.VERSION
+        )
+
+        if loaded_ver == self.VERSION:
+            self.merge_from_other_cfg(loaded_cfg)
+        else:
+            # compat.py needs to import CfgNode
+            from .compat import upgrade_config, downgrade_config
+
+            logger.warning(
+                "Loading an old v{} config file '{}' by automatically upgrading to v{}. "
+                "See docs/CHANGELOG.md for instructions to update your files.".format(
+                    loaded_ver, cfg_filename, self.VERSION
+                )
+            )
+            # To convert, first obtain a full config at an old version
+            old_self = downgrade_config(self, to_version=loaded_ver)
+            old_self.merge_from_other_cfg(loaded_cfg)
+            new_config = upgrade_config(old_self)
+            self.clear()
+            self.update(new_config)
+
+    def dump(self, *args, **kwargs):
+        """
+        Returns:
+            str: a yaml string representation of the config
+        """
+        # to make it show up in docs
+        return super().dump(*args, **kwargs)
+
+
+global_cfg = CfgNode()
+
+
+def get_cfg() -> CfgNode:
+    """
+    Get a copy of the default config.
+
+    Returns:
+        a detectron2 CfgNode instance.
+    """
+    from .defaults import _C
+
+    return _C.clone()
+
+
+def set_global_cfg(cfg: CfgNode) -> None:
+    """
+    Let the global config point to the given cfg.
+
+    Assume that the given "cfg" has the key "KEY", after calling
+    `set_global_cfg(cfg)`, the key can be accessed by:
+    ::
+        from detectron2.config import global_cfg
+        print(global_cfg.KEY)
+
+    By using a hacky global config, you can access these configs anywhere,
+    without having to pass the config object or the values deep into the code.
+    This is a hacky feature introduced for quick prototyping / research exploration.
+    """
+    global global_cfg
+    global_cfg.clear()
+    global_cfg.update(cfg)
+
+
+def configurable(init_func=None, *, from_config=None):
+    """
+    Decorate a function or a class's __init__ method so that it can be called
+    with a :class:`CfgNode` object using a :func:`from_config` function that translates
+    :class:`CfgNode` to arguments.
+
+    Examples:
+    ::
+        # Usage 1: Decorator on __init__:
+        class A:
+            @configurable
+            def __init__(self, a, b=2, c=3):
+                pass
+
+            @classmethod
+            def from_config(cls, cfg):   # 'cfg' must be the first argument
+                # Returns kwargs to be passed to __init__
+                return {"a": cfg.A, "b": cfg.B}
+
+        a1 = A(a=1, b=2)  # regular construction
+        a2 = A(cfg)       # construct with a cfg
+        a3 = A(cfg, b=3, c=4)  # construct with extra overwrite
+
+        # Usage 2: Decorator on any function. Needs an extra from_config argument:
+        @configurable(from_config=lambda cfg: {"a: cfg.A, "b": cfg.B})
+        def a_func(a, b=2, c=3):
+            pass
+
+        a1 = a_func(a=1, b=2)  # regular call
+        a2 = a_func(cfg)       # call with a cfg
+        a3 = a_func(cfg, b=3, c=4)  # call with extra overwrite
+
+    Args:
+        init_func (callable): a class's ``__init__`` method in usage 1. The
+            class must have a ``from_config`` classmethod which takes `cfg` as
+            the first argument.
+        from_config (callable): the from_config function in usage 2. It must take `cfg`
+            as its first argument.
+    """
+
+    if init_func is not None:
+        assert (
+            inspect.isfunction(init_func)
+            and from_config is None
+            and init_func.__name__ == "__init__"
+        ), "Incorrect use of @configurable. Check API documentation for examples."
+
+        @functools.wraps(init_func)
+        def wrapped(self, *args, **kwargs):
+            try:
+                from_config_func = type(self).from_config
+            except AttributeError as e:
+                raise AttributeError(
+                    "Class with @configurable must have a 'from_config' classmethod."
+                ) from e
+            if not inspect.ismethod(from_config_func):
+                raise TypeError("Class with @configurable must have a 'from_config' classmethod.")
+
+            if _called_with_cfg(*args, **kwargs):
+                explicit_args = _get_args_from_config(from_config_func, *args, **kwargs)
+                init_func(self, **explicit_args)
+            else:
+                init_func(self, *args, **kwargs)
+
+        return wrapped
+
+    else:
+        if from_config is None:
+            return configurable  # @configurable() is made equivalent to @configurable
+        assert inspect.isfunction(
+            from_config
+        ), "from_config argument of configurable must be a function!"
+
+        def wrapper(orig_func):
+            @functools.wraps(orig_func)
+            def wrapped(*args, **kwargs):
+                if _called_with_cfg(*args, **kwargs):
+                    explicit_args = _get_args_from_config(from_config, *args, **kwargs)
+                    return orig_func(**explicit_args)
+                else:
+                    return orig_func(*args, **kwargs)
+
+            return wrapped
+
+        return wrapper
+
+
+def _get_args_from_config(from_config_func, *args, **kwargs):
+    """
+    Use `from_config` to obtain explicit arguments.
+
+    Returns:
+        dict: arguments to be used for cls.__init__
+    """
+    signature = inspect.signature(from_config_func)
+    if list(signature.parameters.keys())[0] != "cfg":
+        if inspect.isfunction(from_config_func):
+            name = from_config_func.__name__
+        else:
+            name = f"{from_config_func.__self__}.from_config"
+        raise TypeError(f"{name} must take 'cfg' as the first argument!")
+    support_var_arg = any(
+        param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD]
+        for param in signature.parameters.values()
+    )
+    if support_var_arg:  # forward all arguments to from_config, if from_config accepts them
+        ret = from_config_func(*args, **kwargs)
+    else:
+        # forward supported arguments to from_config
+        supported_arg_names = set(signature.parameters.keys())
+        extra_kwargs = {}
+        for name in list(kwargs.keys()):
+            if name not in supported_arg_names:
+                extra_kwargs[name] = kwargs.pop(name)
+        ret = from_config_func(*args, **kwargs)
+        # forward the other arguments to __init__
+        ret.update(extra_kwargs)
+    return ret
+
+
+def _called_with_cfg(*args, **kwargs):
+    """
+    Returns:
+        bool: whether the arguments contain CfgNode and should be considered
+            forwarded to from_config.
+    """
+    from omegaconf import DictConfig
+
+    if len(args) and isinstance(args[0], (_CfgNode, DictConfig)):
+        return True
+    if isinstance(kwargs.pop("cfg", None), (_CfgNode, DictConfig)):
+        return True
+    # `from_config`'s first argument is forced to be "cfg".
+    # So the above check covers all cases.
+    return False
diff --git a/src/sts/detectron2/config/defaults.py b/src/sts/detectron2/config/defaults.py
new file mode 100644
index 0000000000000000000000000000000000000000..a334c14dc765e20b4d95cea8e1ff990193c80af6
--- /dev/null
+++ b/src/sts/detectron2/config/defaults.py
@@ -0,0 +1,628 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .config import CfgNode as CN
+
+# -----------------------------------------------------------------------------
+# Convention about Training / Test specific parameters
+# -----------------------------------------------------------------------------
+# Whenever an argument can be either used for training or for testing, the
+# corresponding name will be post-fixed by a _TRAIN for a training parameter,
+# or _TEST for a test-specific parameter.
+# For example, the number of images during training will be
+# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
+# IMAGES_PER_BATCH_TEST
+
+# -----------------------------------------------------------------------------
+# Config definition
+# -----------------------------------------------------------------------------
+
+_C = CN()
+
+# The version number, to upgrade from old configs to new ones if any
+# changes happen. It's recommended to keep a VERSION in your config file.
+_C.VERSION = 2
+
+_C.MODEL = CN()
+_C.MODEL.LOAD_PROPOSALS = False
+_C.MODEL.MASK_ON = False
+_C.MODEL.KEYPOINT_ON = False
+_C.MODEL.DEVICE = "cuda"
+_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
+
+# Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file
+# to be loaded to the model. You can find available models in the model zoo.
+_C.MODEL.WEIGHTS = ""
+
+# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
+# To train on images of different number of channels, just set different mean & std.
+# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
+_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675]
+# When using pre-trained models in Detectron1 or any MSRA models,
+# std has been absorbed into its conv1 weights, so the std needs to be set 1.
+# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
+_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0]
+
+
+# -----------------------------------------------------------------------------
+# INPUT
+# -----------------------------------------------------------------------------
+_C.INPUT = CN()
+# Size of the smallest side of the image during training
+_C.INPUT.MIN_SIZE_TRAIN = (800,)
+# Sample size of smallest side by choice or random selection from range give by
+# INPUT.MIN_SIZE_TRAIN
+_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
+# Maximum size of the side of the image during training
+_C.INPUT.MAX_SIZE_TRAIN = 1333
+# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
+_C.INPUT.MIN_SIZE_TEST = 800
+# Maximum size of the side of the image during testing
+_C.INPUT.MAX_SIZE_TEST = 1333
+# Mode for flipping images used in data augmentation during training
+# choose one of ["horizontal, "vertical", "none"]
+_C.INPUT.RANDOM_FLIP = "horizontal"
+
+# `True` if cropping is used for data augmentation during training
+_C.INPUT.CROP = CN({"ENABLED": False})
+# Cropping type. See documentation of `detectron2.data.transforms.RandomCrop` for explanation.
+_C.INPUT.CROP.TYPE = "relative_range"
+# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
+# pixels if CROP.TYPE is "absolute"
+_C.INPUT.CROP.SIZE = [0.9, 0.9]
+_C.INPUT.CROP.CROP_INSTANCE = False
+
+# Whether the model needs RGB, YUV, HSV etc.
+# Should be one of the modes defined here, as we use PIL to read the image:
+# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
+# with BGR being the one exception. One can set image format to BGR, we will
+# internally use RGB for conversion and flip the channels over
+_C.INPUT.FORMAT = "BGR"
+# The ground truth mask format that the model will use.
+# Mask R-CNN supports either "polygon" or "bitmask" as ground truth.
+_C.INPUT.MASK_FORMAT = "polygon"  # alternative: "bitmask"
+
+
+# -----------------------------------------------------------------------------
+# Dataset
+# -----------------------------------------------------------------------------
+_C.DATASETS = CN()
+# List of the dataset names for training. Must be registered in DatasetCatalog
+# Samples from these datasets will be merged and used as one dataset.
+_C.DATASETS.TRAIN = ()
+# List of the pre-computed proposal files for training, which must be consistent
+# with datasets listed in DATASETS.TRAIN.
+_C.DATASETS.PROPOSAL_FILES_TRAIN = ()
+# Number of top scoring precomputed proposals to keep for training
+_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000
+# List of the dataset names for testing. Must be registered in DatasetCatalog
+_C.DATASETS.TEST = ()
+# List of the pre-computed proposal files for test, which must be consistent
+# with datasets listed in DATASETS.TEST.
+_C.DATASETS.PROPOSAL_FILES_TEST = ()
+# Number of top scoring precomputed proposals to keep for test
+_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000
+
+# -----------------------------------------------------------------------------
+# DataLoader
+# -----------------------------------------------------------------------------
+_C.DATALOADER = CN()
+# Number of data loading threads
+_C.DATALOADER.NUM_WORKERS = 4
+# If True, each batch should contain only images for which the aspect ratio
+# is compatible. This groups portrait images together, and landscape images
+# are not batched with portrait images.
+_C.DATALOADER.ASPECT_RATIO_GROUPING = True
+# Options: TrainingSampler, RepeatFactorTrainingSampler
+_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"
+# Repeat threshold for RepeatFactorTrainingSampler
+_C.DATALOADER.REPEAT_THRESHOLD = 0.0
+# Tf True, when working on datasets that have instance annotations, the
+# training dataloader will filter out images without associated annotations
+_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
+
+# ---------------------------------------------------------------------------- #
+# Backbone options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.BACKBONE = CN()
+
+_C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
+# Freeze the first several stages so they are not trained.
+# There are 5 stages in ResNet. The first is a convolution, and the following
+# stages are each group of residual blocks.
+_C.MODEL.BACKBONE.FREEZE_AT = 2
+
+
+# ---------------------------------------------------------------------------- #
+# FPN options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.FPN = CN()
+# Names of the input feature maps to be used by FPN
+# They must have contiguous power of 2 strides
+# e.g., ["res2", "res3", "res4", "res5"]
+_C.MODEL.FPN.IN_FEATURES = []
+_C.MODEL.FPN.OUT_CHANNELS = 256
+
+# Options: "" (no norm), "GN"
+_C.MODEL.FPN.NORM = ""
+
+# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
+_C.MODEL.FPN.FUSE_TYPE = "sum"
+
+
+# ---------------------------------------------------------------------------- #
+# Proposal generator options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.PROPOSAL_GENERATOR = CN()
+# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals"
+_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
+# Proposal height and width both need to be greater than MIN_SIZE
+# (a the scale used during training or inference)
+_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0
+
+
+# ---------------------------------------------------------------------------- #
+# Anchor generator options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ANCHOR_GENERATOR = CN()
+# The generator can be any name in the ANCHOR_GENERATOR registry
+_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
+# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
+# Format: list[list[float]]. SIZES[i] specifies the list of sizes to use for
+# IN_FEATURES[i]; len(SIZES) must be equal to len(IN_FEATURES) or 1.
+# When len(SIZES) == 1, SIZES[0] is used for all IN_FEATURES.
+_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
+# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
+# ratios are generated by an anchor generator.
+# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
+# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
+# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
+# for all IN_FEATURES.
+_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
+# Anchor angles.
+# list[list[float]], the angle in degrees, for each input feature map.
+# ANGLES[i] specifies the list of angles for IN_FEATURES[i].
+_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
+# Relative offset between the center of the first anchor and the top-left corner of the image
+# Value has to be in [0, 1). Recommend to use 0.5, which means half stride.
+# The value is not expected to affect model accuracy.
+_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0
+
+# ---------------------------------------------------------------------------- #
+# RPN options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.RPN = CN()
+_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead"  # used by RPN_HEAD_REGISTRY
+
+# Names of the input feature maps to be used by RPN
+# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
+_C.MODEL.RPN.IN_FEATURES = ["res4"]
+# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels
+# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
+_C.MODEL.RPN.BOUNDARY_THRESH = -1
+# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD]
+# Minimum overlap required between an anchor and ground-truth box for the
+# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
+# ==> positive RPN example: 1)
+# Maximum overlap allowed between an anchor and ground-truth box for the
+# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
+# ==> negative RPN example: 0)
+# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD)
+# are ignored (-1)
+_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7]
+_C.MODEL.RPN.IOU_LABELS = [0, -1, 1]
+# Number of regions per image used to train RPN
+_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
+# Target fraction of foreground (positive) examples per RPN minibatch
+_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
+# Options are: "smooth_l1", "giou"
+_C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1"
+_C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0
+# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
+_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
+# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
+_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0
+_C.MODEL.RPN.LOSS_WEIGHT = 1.0
+# Number of top scoring RPN proposals to keep before applying NMS
+# When FPN is used, this is *per FPN level* (not total)
+_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000
+_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000
+# Number of top scoring RPN proposals to keep after applying NMS
+# When FPN is used, this limit is applied per level and then again to the union
+# of proposals from all levels
+# NOTE: When FPN is used, the meaning of this config is different from Detectron1.
+# It means per-batch topk in Detectron1, but per-image topk here.
+# See the "find_top_rpn_proposals" function for details.
+_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
+_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
+# NMS threshold used on RPN proposals
+_C.MODEL.RPN.NMS_THRESH = 0.7
+# Set this to -1 to use the same number of output channels as input channels.
+_C.MODEL.RPN.CONV_DIMS = [-1]
+
+# ---------------------------------------------------------------------------- #
+# ROI HEADS options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_HEADS = CN()
+_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads"
+# Number of foreground classes
+_C.MODEL.ROI_HEADS.NUM_CLASSES = 80
+# Names of the input feature maps to be used by ROI heads
+# Currently all heads (box, mask, ...) use the same input feature map list
+# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
+_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"]
+# IOU overlap ratios [IOU_THRESHOLD]
+# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD)
+# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
+_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
+_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
+# RoI minibatch size *per image* (number of regions of interest [ROIs])
+# Total number of RoIs per training minibatch =
+#   ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
+# E.g., a common configuration is: 512 * 16 = 8192
+_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
+# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
+_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
+
+# Only used on test mode
+
+# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
+# balance obtaining high recall with not having too many low precision
+# detections that will slow down inference post processing steps (like NMS)
+# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
+# inference.
+_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
+# Overlap threshold used for non-maximum suppression (suppress boxes with
+# IoU >= this threshold)
+_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
+# If True, augment proposals with ground-truth boxes before sampling proposals to
+# train ROI heads.
+_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True
+
+# ---------------------------------------------------------------------------- #
+# Box Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_BOX_HEAD = CN()
+# C4 don't use head name option
+# Options for non-C4 models: FastRCNNConvFCHead,
+_C.MODEL.ROI_BOX_HEAD.NAME = ""
+# Options are: "smooth_l1", "giou"
+_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1"
+# The final scaling coefficient on the box regression loss, used to balance the magnitude of its
+# gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`.
+_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0
+# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
+# These are empirically chosen to approximately lead to unit variance targets
+_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0)
+# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
+_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0
+_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
+_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
+# Type of pooling operation applied to the incoming feature map for each RoI
+_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
+
+_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0
+# Hidden layer dimension for FC layers in the RoI box head
+_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024
+_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0
+# Channel dimension for Conv layers in the RoI box head
+_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256
+# Normalization method for the convolution layers.
+# Options: "" (no norm), "GN", "SyncBN".
+_C.MODEL.ROI_BOX_HEAD.NORM = ""
+# Whether to use class agnostic for bbox regression
+_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False
+# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes.
+_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False
+
+# ---------------------------------------------------------------------------- #
+# Cascaded Box Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_BOX_CASCADE_HEAD = CN()
+# The number of cascade stages is implicitly defined by the length of the following two configs.
+_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = (
+    (10.0, 10.0, 5.0, 5.0),
+    (20.0, 20.0, 10.0, 10.0),
+    (30.0, 30.0, 15.0, 15.0),
+)
+_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)
+
+
+# ---------------------------------------------------------------------------- #
+# Mask Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_MASK_HEAD = CN()
+_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
+_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
+_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
+_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0  # The number of convs in the mask head
+_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
+# Normalization method for the convolution layers.
+# Options: "" (no norm), "GN", "SyncBN".
+_C.MODEL.ROI_MASK_HEAD.NORM = ""
+# Whether to use class agnostic for mask prediction
+_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False
+# Type of pooling operation applied to the incoming feature map for each RoI
+_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2"
+
+
+# ---------------------------------------------------------------------------- #
+# Keypoint Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ROI_KEYPOINT_HEAD = CN()
+_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
+_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
+_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
+_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8))
+_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17  # 17 is the number of keypoints in COCO.
+
+# Images with too few (or no) keypoints are excluded from training.
+_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1
+# Normalize by the total number of visible keypoints in the minibatch if True.
+# Otherwise, normalize by the total number of keypoints that could ever exist
+# in the minibatch.
+# The keypoint softmax loss is only calculated on visible keypoints.
+# Since the number of visible keypoints can vary significantly between
+# minibatches, this has the effect of up-weighting the importance of
+# minibatches with few visible keypoints. (Imagine the extreme case of
+# only one visible keypoint versus N: in the case of N, each one
+# contributes 1/N to the gradient compared to the single keypoint
+# determining the gradient direction). Instead, we can normalize the
+# loss by the total number of keypoints, if it were the case that all
+# keypoints were visible in a full minibatch. (Returning to the example,
+# this means that the one visible keypoint contributes as much as each
+# of the N keypoints.)
+_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True
+# Multi-task loss weight to use for keypoints
+# Recommended values:
+#   - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
+#   - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
+_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0
+# Type of pooling operation applied to the incoming feature map for each RoI
+_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2"
+
+# ---------------------------------------------------------------------------- #
+# Semantic Segmentation Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.SEM_SEG_HEAD = CN()
+_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead"
+_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"]
+# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
+# the correposnding pixel.
+_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255
+# Number of classes in the semantic segmentation head
+_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54
+# Number of channels in the 3x3 convs inside semantic-FPN heads.
+_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128
+# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
+_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
+# Normalization method for the convolution layers. Options: "" (no norm), "GN".
+_C.MODEL.SEM_SEG_HEAD.NORM = "GN"
+_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0
+
+_C.MODEL.PANOPTIC_FPN = CN()
+# Scaling of all losses from instance detection / segmentation head.
+_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0
+
+# options when combining instance & semantic segmentation outputs
+_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True})  # "COMBINE.ENABLED" is deprecated & not used
+_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5
+_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096
+_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5
+
+
+# ---------------------------------------------------------------------------- #
+# RetinaNet Head
+# ---------------------------------------------------------------------------- #
+_C.MODEL.RETINANET = CN()
+
+# This is the number of foreground classes.
+_C.MODEL.RETINANET.NUM_CLASSES = 80
+
+_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
+
+# Convolutions to use in the cls and bbox tower
+# NOTE: this doesn't include the last conv for logits
+_C.MODEL.RETINANET.NUM_CONVS = 4
+
+# IoU overlap ratio [bg, fg] for labeling anchors.
+# Anchors with < bg are labeled negative (0)
+# Anchors  with >= bg and < fg are ignored (-1)
+# Anchors with >= fg are labeled positive (1)
+_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
+_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
+
+# Prior prob for rare case (i.e. foreground) at the beginning of training.
+# This is used to set the bias for the logits layer of the classifier subnet.
+# This improves training stability in the case of heavy class imbalance.
+_C.MODEL.RETINANET.PRIOR_PROB = 0.01
+
+# Inference cls score threshold, only anchors with score > INFERENCE_TH are
+# considered for inference (to improve speed)
+_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
+# Select topk candidates before NMS
+_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
+_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
+
+# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
+_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
+
+# Loss parameters
+_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
+_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
+_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
+# Options are: "smooth_l1", "giou"
+_C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1"
+
+# One of BN, SyncBN, FrozenBN, GN
+# Only supports GN until unshared norm is implemented
+_C.MODEL.RETINANET.NORM = ""
+
+
+# ---------------------------------------------------------------------------- #
+# ResNe[X]t options (ResNets = {ResNet, ResNeXt}
+# Note that parts of a resnet may be used for both the backbone and the head
+# These options apply to both
+# ---------------------------------------------------------------------------- #
+_C.MODEL.RESNETS = CN()
+
+_C.MODEL.RESNETS.DEPTH = 50
+_C.MODEL.RESNETS.OUT_FEATURES = ["res4"]  # res4 for C4 backbone, res2..5 for FPN backbone
+
+# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
+_C.MODEL.RESNETS.NUM_GROUPS = 1
+
+# Options: FrozenBN, GN, "SyncBN", "BN"
+_C.MODEL.RESNETS.NORM = "FrozenBN"
+
+# Baseline width of each group.
+# Scaling this parameters will scale the width of all bottleneck layers.
+_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
+
+# Place the stride 2 conv on the 1x1 filter
+# Use True only for the original MSRA ResNet; use False for C2 and Torch models
+_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
+
+# Apply dilation in stage "res5"
+_C.MODEL.RESNETS.RES5_DILATION = 1
+
+# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet
+# For R18 and R34, this needs to be set to 64
+_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
+_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
+
+# Apply Deformable Convolution in stages
+# Specify if apply deform_conv on Res2, Res3, Res4, Res5
+_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
+# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168);
+# Use False for DeformableV1.
+_C.MODEL.RESNETS.DEFORM_MODULATED = False
+# Number of groups in deformable conv.
+_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1
+
+
+# ---------------------------------------------------------------------------- #
+# Solver
+# ---------------------------------------------------------------------------- #
+_C.SOLVER = CN()
+
+# See detectron2/solver/build.py for LR scheduler options
+_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
+
+_C.SOLVER.MAX_ITER = 40000
+
+_C.SOLVER.BASE_LR = 0.001
+
+_C.SOLVER.MOMENTUM = 0.9
+
+_C.SOLVER.NESTEROV = False
+
+_C.SOLVER.WEIGHT_DECAY = 0.0001
+# The weight decay that's applied to parameters of normalization layers
+# (typically the affine transformation)
+_C.SOLVER.WEIGHT_DECAY_NORM = 0.0
+
+_C.SOLVER.GAMMA = 0.1
+# The iteration number to decrease learning rate by GAMMA.
+_C.SOLVER.STEPS = (30000,)
+
+_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
+_C.SOLVER.WARMUP_ITERS = 1000
+_C.SOLVER.WARMUP_METHOD = "linear"
+
+# Save a checkpoint after every this number of iterations
+_C.SOLVER.CHECKPOINT_PERIOD = 5000
+
+# Number of images per batch across all machines. This is also the number
+# of training images per step (i.e. per iteration). If we use 16 GPUs
+# and IMS_PER_BATCH = 32, each GPU will see 2 images per batch.
+# May be adjusted automatically if REFERENCE_WORLD_SIZE is set.
+_C.SOLVER.IMS_PER_BATCH = 16
+
+# The reference number of workers (GPUs) this config is meant to train with.
+# It takes no effect when set to 0.
+# With a non-zero value, it will be used by DefaultTrainer to compute a desired
+# per-worker batch size, and then scale the other related configs (total batch size,
+# learning rate, etc) to match the per-worker batch size.
+# See documentation of `DefaultTrainer.auto_scale_workers` for details:
+_C.SOLVER.REFERENCE_WORLD_SIZE = 0
+
+# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
+# biases. This is not useful (at least for recent models). You should avoid
+# changing these and they exist only to reproduce Detectron v1 training if
+# desired.
+_C.SOLVER.BIAS_LR_FACTOR = 1.0
+_C.SOLVER.WEIGHT_DECAY_BIAS = _C.SOLVER.WEIGHT_DECAY
+
+# Gradient clipping
+_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False})
+# Type of gradient clipping, currently 2 values are supported:
+# - "value": the absolute values of elements of each gradients are clipped
+# - "norm": the norm of the gradient for each parameter is clipped thus
+#   affecting all elements in the parameter
+_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
+# Maximum absolute value used for clipping gradients
+_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
+# Floating point number p for L-p norm to be used with the "norm"
+# gradient clipping type; for L-inf, please specify .inf
+_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0
+
+# Enable automatic mixed precision for training
+# Note that this does not change model's inference behavior.
+# To use AMP in inference, run inference under autocast()
+_C.SOLVER.AMP = CN({"ENABLED": False})
+
+# ---------------------------------------------------------------------------- #
+# Specific test options
+# ---------------------------------------------------------------------------- #
+_C.TEST = CN()
+# For end-to-end tests to verify the expected accuracy.
+# Each item is [task, metric, value, tolerance]
+# e.g.: [['bbox', 'AP', 38.5, 0.2]]
+_C.TEST.EXPECTED_RESULTS = []
+# The period (in terms of steps) to evaluate the model during training.
+# Set to 0 to disable.
+_C.TEST.EVAL_PERIOD = 0
+# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval
+# When empty, it will use the defaults in COCO.
+# Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
+_C.TEST.KEYPOINT_OKS_SIGMAS = []
+# Maximum number of detections to return per image during inference (100 is
+# based on the limit established for the COCO dataset).
+_C.TEST.DETECTIONS_PER_IMAGE = 100
+
+_C.TEST.AUG = CN({"ENABLED": False})
+_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
+_C.TEST.AUG.MAX_SIZE = 4000
+_C.TEST.AUG.FLIP = True
+
+_C.TEST.PRECISE_BN = CN({"ENABLED": False})
+_C.TEST.PRECISE_BN.NUM_ITER = 200
+
+# ---------------------------------------------------------------------------- #
+# Misc options
+# ---------------------------------------------------------------------------- #
+# Directory where output files are written
+_C.OUTPUT_DIR = "./output"
+# Set seed to negative to fully randomize everything.
+# Set seed to positive to use a fixed seed. Note that a fixed seed increases
+# reproducibility but does not guarantee fully deterministic behavior.
+# Disabling all parallelism further increases reproducibility.
+_C.SEED = -1
+# Benchmark different cudnn algorithms.
+# If input images have very different sizes, this option will have large overhead
+# for about 10k iterations. It usually hurts total time, but can benefit for certain models.
+# If input images have the same or similar sizes, benchmark is often helpful.
+_C.CUDNN_BENCHMARK = False
+# The period (in terms of steps) for minibatch visualization at train time.
+# Set to 0 to disable.
+_C.VIS_PERIOD = 0
+
+# global config is for quick hack purposes.
+# You can set them in command line or config files,
+# and access it with:
+#
+# from detectron2.config import global_cfg
+# print(global_cfg.HACK)
+#
+# Do not commit any configs into it.
+_C.GLOBAL = CN()
+_C.GLOBAL.HACK = 1.0
diff --git a/src/sts/detectron2/config/instantiate.py b/src/sts/detectron2/config/instantiate.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e02a2c526445ba2aa18396181cee966c548dc12
--- /dev/null
+++ b/src/sts/detectron2/config/instantiate.py
@@ -0,0 +1,110 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import dataclasses
+import logging
+from collections import abc
+from typing import Any
+from omegaconf import DictConfig
+
+from detectron2.utils.registry import _convert_target_to_string, locate
+
+__all__ = ["dump_dataclass", "instantiate"]
+
+
+def dump_dataclass(obj: Any):
+    """
+    Dump a dataclass recursively into a dict that can be later instantiated.
+
+    Args:
+        obj: a dataclass object
+
+    Returns:
+        dict
+    """
+    assert dataclasses.is_dataclass(obj) and not isinstance(
+        obj, type
+    ), "dump_dataclass() requires an instance of a dataclass."
+    ret = {"_target_": _convert_target_to_string(type(obj))}
+    for f in dataclasses.fields(obj):
+        v = getattr(obj, f.name)
+        if dataclasses.is_dataclass(v):
+            v = dump_dataclass(v)
+        if isinstance(v, (list, tuple)):
+            v = [dump_dataclass(x) if dataclasses.is_dataclass(x) else x for x in v]
+        ret[f.name] = v
+    return ret
+
+
+def instantiate(cfg):
+    """
+    Recursively instantiate objects defined in dictionaries by
+    "_target_" and arguments.
+
+    Args:
+        cfg: a dict-like object with "_target_" that defines the caller, and
+            other keys that define the arguments
+
+    Returns:
+        object instantiated by cfg
+    """
+    from omegaconf import ListConfig
+
+    if isinstance(cfg, ListConfig):
+        lst = [instantiate(x) for x in cfg]
+        return ListConfig(lst, flags={"allow_objects": True})
+    if isinstance(cfg, list):
+        # Specialize for list, because many classes take
+        # list[objects] as arguments, such as ResNet, DatasetMapper
+        return [instantiate(x) for x in cfg]
+
+    if isinstance(cfg, abc.Mapping) and "_target_" in cfg:
+        # conceptually equivalent to hydra.utils.instantiate(cfg) with _convert_=all,
+        # but faster: https://github.com/facebookresearch/hydra/issues/1200
+        cfg = {k: instantiate(v) for k, v in cfg.items()}
+        cls = cfg.pop("_target_")
+        cls = instantiate(cls)
+
+        if isinstance(cls, str):
+            cls_name = cls
+            cls = locate(cls_name)
+            assert cls is not None, cls_name
+        else:
+            try:
+                cls_name = cls.__module__ + "." + cls.__qualname__
+            except AttributeError:
+                # target could be anything, so the above could fail
+                cls_name = str(cls)
+        assert callable(cls), f"_target_ {cls} does not define a callable object"
+        try:
+            return cls(**cfg)
+        except TypeError:
+            logger = logging.getLogger(__name__)
+            logger.error(f"Error when instantiating {cls_name}!")
+            raise
+    return cfg  # return as-is if don't know what to do
+
+
+class LazyCall:
+    """
+    Wrap a callable so that when it's called, the call will not be execued,
+    but returns a dict that describes the call.
+
+    LazyCall object has to be called with only keyword arguments. Positional
+    arguments are not yet supported.
+
+    Examples:
+    ::
+        layer_cfg = LazyCall(nn.Conv2d)(in_channels=32, out_channels=32)
+        layer_cfg.out_channels = 64
+        layer = instantiate(layer_cfg)
+    """
+
+    def __init__(self, target):
+        if not (callable(target) or isinstance(target, (str, abc.Mapping))):
+            raise TypeError(
+                "target of LazyCall must be a callable or defines a callable! Got {target}"
+            )
+        self._target = target
+
+    def __call__(self, **kwargs):
+        kwargs["_target_"] = self._target
+        return DictConfig(content=kwargs, flags={"allow_objects": True})
diff --git a/src/sts/detectron2/data/__init__.py b/src/sts/detectron2/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..21c83f8cbd7a9388b452372f0444e78a54a33495
--- /dev/null
+++ b/src/sts/detectron2/data/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from . import transforms  # isort:skip
+
+from .build import (
+    build_batch_data_loader,
+    build_detection_test_loader,
+    build_detection_train_loader,
+    get_detection_dataset_dicts,
+    load_proposals_into_dataset,
+    print_instances_class_histogram,
+)
+from .catalog import DatasetCatalog, MetadataCatalog, Metadata
+from .common import DatasetFromList, MapDataset
+from .dataset_mapper import DatasetMapper
+
+# ensure the builtin datasets are registered
+from . import datasets, samplers  # isort:skip
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/detectron2/data/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/data/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..80c77f76e53e562c21b6058db851457b8dbf22f2
Binary files /dev/null and b/src/sts/detectron2/data/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/__pycache__/build.cpython-38.pyc b/src/sts/detectron2/data/__pycache__/build.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad032bf640d7dad399c1cc278d926d874a813256
Binary files /dev/null and b/src/sts/detectron2/data/__pycache__/build.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/__pycache__/catalog.cpython-38.pyc b/src/sts/detectron2/data/__pycache__/catalog.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff42dcd4f30fc8c53df9263f76c0aed5937efe68
Binary files /dev/null and b/src/sts/detectron2/data/__pycache__/catalog.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/__pycache__/common.cpython-38.pyc b/src/sts/detectron2/data/__pycache__/common.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aecfa6f98a68a90ab9ec67abc609965f5876fffd
Binary files /dev/null and b/src/sts/detectron2/data/__pycache__/common.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/__pycache__/dataset_mapper.cpython-38.pyc b/src/sts/detectron2/data/__pycache__/dataset_mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7f6f3bb85f5232bbbeb8b8b8532542d49d33635f
Binary files /dev/null and b/src/sts/detectron2/data/__pycache__/dataset_mapper.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/__pycache__/detection_utils.cpython-38.pyc b/src/sts/detectron2/data/__pycache__/detection_utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..85562b9746bcfd61a300f7e14929a968b58b0faa
Binary files /dev/null and b/src/sts/detectron2/data/__pycache__/detection_utils.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/build.py b/src/sts/detectron2/data/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..14dc1e769baf75352165fe00ff363023bdd10518
--- /dev/null
+++ b/src/sts/detectron2/data/build.py
@@ -0,0 +1,472 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import logging
+import numpy as np
+import operator
+import pickle
+import torch.utils.data
+from tabulate import tabulate
+from termcolor import colored
+
+from detectron2.config import configurable
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import get_world_size
+from detectron2.utils.env import seed_all_rng
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import _log_api_usage, log_first_n
+
+from .catalog import DatasetCatalog, MetadataCatalog
+from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
+from .dataset_mapper import DatasetMapper
+from .detection_utils import check_metadata_consistency
+from .samplers import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
+
+"""
+This file contains the default logic to build a dataloader for training or testing.
+"""
+
+__all__ = [
+    "build_batch_data_loader",
+    "build_detection_train_loader",
+    "build_detection_test_loader",
+    "get_detection_dataset_dicts",
+    "load_proposals_into_dataset",
+    "print_instances_class_histogram",
+]
+
+
+def filter_images_with_only_crowd_annotations(dataset_dicts):
+    """
+    Filter out images with none annotations or only crowd annotations
+    (i.e., images without non-crowd annotations).
+    A common training-time preprocessing on COCO dataset.
+
+    Args:
+        dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
+
+    Returns:
+        list[dict]: the same format, but filtered.
+    """
+    num_before = len(dataset_dicts)
+
+    def valid(anns):
+        for ann in anns:
+            if ann.get("iscrowd", 0) == 0:
+                return True
+        return False
+
+    dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
+    num_after = len(dataset_dicts)
+    logger = logging.getLogger(__name__)
+    logger.info(
+        "Removed {} images with no usable annotations. {} images left.".format(
+            num_before - num_after, num_after
+        )
+    )
+    return dataset_dicts
+
+
+def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image):
+    """
+    Filter out images with too few number of keypoints.
+
+    Args:
+        dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
+
+    Returns:
+        list[dict]: the same format as dataset_dicts, but filtered.
+    """
+    num_before = len(dataset_dicts)
+
+    def visible_keypoints_in_image(dic):
+        # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility
+        annotations = dic["annotations"]
+        return sum(
+            (np.array(ann["keypoints"][2::3]) > 0).sum()
+            for ann in annotations
+            if "keypoints" in ann
+        )
+
+    dataset_dicts = [
+        x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image
+    ]
+    num_after = len(dataset_dicts)
+    logger = logging.getLogger(__name__)
+    logger.info(
+        "Removed {} images with fewer than {} keypoints.".format(
+            num_before - num_after, min_keypoints_per_image
+        )
+    )
+    return dataset_dicts
+
+
+def load_proposals_into_dataset(dataset_dicts, proposal_file):
+    """
+    Load precomputed object proposals into the dataset.
+
+    The proposal file should be a pickled dict with the following keys:
+
+    - "ids": list[int] or list[str], the image ids
+    - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id
+    - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores
+      corresponding to the boxes.
+    - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``.
+
+    Args:
+        dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
+        proposal_file (str): file path of pre-computed proposals, in pkl format.
+
+    Returns:
+        list[dict]: the same format as dataset_dicts, but added proposal field.
+    """
+    logger = logging.getLogger(__name__)
+    logger.info("Loading proposals from: {}".format(proposal_file))
+
+    with PathManager.open(proposal_file, "rb") as f:
+        proposals = pickle.load(f, encoding="latin1")
+
+    # Rename the key names in D1 proposal files
+    rename_keys = {"indexes": "ids", "scores": "objectness_logits"}
+    for key in rename_keys:
+        if key in proposals:
+            proposals[rename_keys[key]] = proposals.pop(key)
+
+    # Fetch the indexes of all proposals that are in the dataset
+    # Convert image_id to str since they could be int.
+    img_ids = set({str(record["image_id"]) for record in dataset_dicts})
+    id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids}
+
+    # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS'
+    bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS
+
+    for record in dataset_dicts:
+        # Get the index of the proposal
+        i = id_to_index[str(record["image_id"])]
+
+        boxes = proposals["boxes"][i]
+        objectness_logits = proposals["objectness_logits"][i]
+        # Sort the proposals in descending order of the scores
+        inds = objectness_logits.argsort()[::-1]
+        record["proposal_boxes"] = boxes[inds]
+        record["proposal_objectness_logits"] = objectness_logits[inds]
+        record["proposal_bbox_mode"] = bbox_mode
+
+    return dataset_dicts
+
+
+def print_instances_class_histogram(dataset_dicts, class_names):
+    """
+    Args:
+        dataset_dicts (list[dict]): list of dataset dicts.
+        class_names (list[str]): list of class names (zero-indexed).
+    """
+    num_classes = len(class_names)
+    hist_bins = np.arange(num_classes + 1)
+    histogram = np.zeros((num_classes,), dtype=np.int)
+    for entry in dataset_dicts:
+        annos = entry["annotations"]
+        classes = np.asarray(
+            [x["category_id"] for x in annos if not x.get("iscrowd", 0)], dtype=np.int
+        )
+        if len(classes):
+            assert classes.min() >= 0, f"Got an invalid category_id={classes.min()}"
+            assert (
+                classes.max() < num_classes
+            ), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes"
+        histogram += np.histogram(classes, bins=hist_bins)[0]
+
+    N_COLS = min(6, len(class_names) * 2)
+
+    def short_name(x):
+        # make long class names shorter. useful for lvis
+        if len(x) > 13:
+            return x[:11] + ".."
+        return x
+
+    data = list(
+        itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
+    )
+    total_num_instances = sum(data[1::2])
+    data.extend([None] * (N_COLS - (len(data) % N_COLS)))
+    if num_classes > 1:
+        data.extend(["total", total_num_instances])
+    data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
+    table = tabulate(
+        data,
+        headers=["category", "#instances"] * (N_COLS // 2),
+        tablefmt="pipe",
+        numalign="left",
+        stralign="center",
+    )
+    log_first_n(
+        logging.INFO,
+        "Distribution of instances among all {} categories:\n".format(num_classes)
+        + colored(table, "cyan"),
+        key="message",
+    )
+
+
+def get_detection_dataset_dicts(names, filter_empty=True, min_keypoints=0, proposal_files=None):
+    """
+    Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
+
+    Args:
+        names (str or list[str]): a dataset name or a list of dataset names
+        filter_empty (bool): whether to filter out images without instance annotations
+        min_keypoints (int): filter out images with fewer keypoints than
+            `min_keypoints`. Set to 0 to do nothing.
+        proposal_files (list[str]): if given, a list of object proposal files
+            that match each dataset in `names`.
+
+    Returns:
+        list[dict]: a list of dicts following the standard dataset dict format.
+    """
+    if isinstance(names, str):
+        names = [names]
+    assert len(names), names
+    dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in names]
+    for dataset_name, dicts in zip(names, dataset_dicts):
+        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
+
+    if proposal_files is not None:
+        assert len(names) == len(proposal_files)
+        # load precomputed proposals from proposal files
+        dataset_dicts = [
+            load_proposals_into_dataset(dataset_i_dicts, proposal_file)
+            for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
+        ]
+
+    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
+
+    has_instances = "annotations" in dataset_dicts[0]
+    if filter_empty and has_instances:
+        dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
+    if min_keypoints > 0 and has_instances:
+        dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
+
+    if has_instances:
+        try:
+            class_names = MetadataCatalog.get(names[0]).thing_classes
+            check_metadata_consistency("thing_classes", names)
+            print_instances_class_histogram(dataset_dicts, class_names)
+        except AttributeError:  # class names are not available for this dataset
+            pass
+
+    assert len(dataset_dicts), "No valid data found in {}.".format(",".join(names))
+    return dataset_dicts
+
+
+def build_batch_data_loader(
+    dataset, sampler, total_batch_size, *, aspect_ratio_grouping=False, num_workers=0
+):
+    """
+    Build a batched dataloader for training.
+
+    Args:
+        dataset (torch.utils.data.Dataset): map-style PyTorch dataset. Can be indexed.
+        sampler (torch.utils.data.sampler.Sampler): a sampler that produces indices
+        total_batch_size, aspect_ratio_grouping, num_workers): see
+            :func:`build_detection_train_loader`.
+
+    Returns:
+        iterable[list]. Length of each list is the batch size of the current
+            GPU. Each element in the list comes from the dataset.
+    """
+    world_size = get_world_size()
+    assert (
+        total_batch_size > 0 and total_batch_size % world_size == 0
+    ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
+        total_batch_size, world_size
+    )
+
+    batch_size = total_batch_size // world_size
+    if aspect_ratio_grouping:
+        data_loader = torch.utils.data.DataLoader(
+            dataset,
+            sampler=sampler,
+            num_workers=num_workers,
+            batch_sampler=None,
+            collate_fn=operator.itemgetter(0),  # don't batch, but yield individual elements
+            worker_init_fn=worker_init_reset_seed,
+        )  # yield individual mapped dict
+        return AspectRatioGroupedDataset(data_loader, batch_size)
+    else:
+        batch_sampler = torch.utils.data.sampler.BatchSampler(
+            sampler, batch_size, drop_last=True
+        )  # drop_last so the batch always have the same size
+        return torch.utils.data.DataLoader(
+            dataset,
+            num_workers=num_workers,
+            batch_sampler=batch_sampler,
+            collate_fn=trivial_batch_collator,
+            worker_init_fn=worker_init_reset_seed,
+        )
+
+
+def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None):
+    if dataset is None:
+        dataset = get_detection_dataset_dicts(
+            cfg.DATASETS.TRAIN,
+            filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
+            min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+            if cfg.MODEL.KEYPOINT_ON
+            else 0,
+            proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+        )
+        _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0])
+
+    if mapper is None:
+        mapper = DatasetMapper(cfg, True)
+
+    if sampler is None:
+        sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
+        logger = logging.getLogger(__name__)
+        logger.info("Using training sampler {}".format(sampler_name))
+        if sampler_name == "TrainingSampler":
+            sampler = TrainingSampler(len(dataset))
+        elif sampler_name == "RepeatFactorTrainingSampler":
+            repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
+                dataset, cfg.DATALOADER.REPEAT_THRESHOLD
+            )
+            sampler = RepeatFactorTrainingSampler(repeat_factors)
+        else:
+            raise ValueError("Unknown training sampler: {}".format(sampler_name))
+
+    return {
+        "dataset": dataset,
+        "sampler": sampler,
+        "mapper": mapper,
+        "total_batch_size": cfg.SOLVER.IMS_PER_BATCH,
+        "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING,
+        "num_workers": cfg.DATALOADER.NUM_WORKERS,
+    }
+
+
+# TODO can allow dataset as an iterable or IterableDataset to make this function more general
+@configurable(from_config=_train_loader_from_config)
+def build_detection_train_loader(
+    dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0
+):
+    """
+    Build a dataloader for object detection with some default features.
+    This interface is experimental.
+
+    Args:
+        dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
+            or a map-style pytorch dataset. They can be obtained by using
+            :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
+        mapper (callable): a callable which takes a sample (dict) from dataset and
+            returns the format to be consumed by the model.
+            When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
+        sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
+            indices to be applied on ``dataset``. Default to :class:`TrainingSampler`,
+            which coordinates an infinite random shuffle sequence across all workers.
+        total_batch_size (int): total batch size across all workers. Batching
+            simply puts data into a list.
+        aspect_ratio_grouping (bool): whether to group images with similar
+            aspect ratio for efficiency. When enabled, it requires each
+            element in dataset be a dict with keys "width" and "height".
+        num_workers (int): number of parallel data loading workers
+
+    Returns:
+        torch.utils.data.DataLoader:
+            a dataloader. Each output from it is a ``list[mapped_element]`` of length
+            ``total_batch_size / num_workers``, where ``mapped_element`` is produced
+            by the ``mapper``.
+    """
+    if isinstance(dataset, list):
+        dataset = DatasetFromList(dataset, copy=False)
+    if mapper is not None:
+        dataset = MapDataset(dataset, mapper)
+    if sampler is None:
+        sampler = TrainingSampler(len(dataset))
+    assert isinstance(sampler, torch.utils.data.sampler.Sampler)
+    return build_batch_data_loader(
+        dataset,
+        sampler,
+        total_batch_size,
+        aspect_ratio_grouping=aspect_ratio_grouping,
+        num_workers=num_workers,
+    )
+
+
+def _test_loader_from_config(cfg, dataset_name, mapper=None):
+    """
+    Uses the given `dataset_name` argument (instead of the names in cfg), because the
+    standard practice is to evaluate each test set individually (not combining them).
+    """
+    dataset = get_detection_dataset_dicts(
+        [dataset_name],
+        filter_empty=False,
+        proposal_files=[
+            cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
+        ]
+        if cfg.MODEL.LOAD_PROPOSALS
+        else None,
+    )
+    if mapper is None:
+        mapper = DatasetMapper(cfg, False)
+    return {"dataset": dataset, "mapper": mapper, "num_workers": cfg.DATALOADER.NUM_WORKERS}
+
+
+@configurable(from_config=_test_loader_from_config)
+def build_detection_test_loader(dataset, *, mapper, sampler=None, num_workers=0):
+    """
+    Similar to `build_detection_train_loader`, but uses a batch size of 1,
+    and :class:`InferenceSampler`. This sampler coordinates all workers to
+    produce the exact set of all samples.
+    This interface is experimental.
+
+    Args:
+        dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
+            or a map-style pytorch dataset. They can be obtained by using
+            :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
+        mapper (callable): a callable which takes a sample (dict) from dataset
+           and returns the format to be consumed by the model.
+           When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
+        sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
+            indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
+            which splits the dataset across all workers.
+        num_workers (int): number of parallel data loading workers
+
+    Returns:
+        DataLoader: a torch DataLoader, that loads the given detection
+        dataset, with test-time transformation and batching.
+
+    Examples:
+    ::
+        data_loader = build_detection_test_loader(
+            DatasetRegistry.get("my_test"),
+            mapper=DatasetMapper(...))
+
+        # or, instantiate with a CfgNode:
+        data_loader = build_detection_test_loader(cfg, "my_test")
+    """
+    if isinstance(dataset, list):
+        dataset = DatasetFromList(dataset, copy=False)
+    if mapper is not None:
+        dataset = MapDataset(dataset, mapper)
+    if sampler is None:
+        sampler = InferenceSampler(len(dataset))
+    # Always use 1 image per worker during inference since this is the
+    # standard when reporting inference time in papers.
+    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
+    data_loader = torch.utils.data.DataLoader(
+        dataset,
+        num_workers=num_workers,
+        batch_sampler=batch_sampler,
+        collate_fn=trivial_batch_collator,
+    )
+    return data_loader
+
+
+def trivial_batch_collator(batch):
+    """
+    A batch collator that does nothing.
+    """
+    return batch
+
+
+def worker_init_reset_seed(worker_id):
+    initial_seed = torch.initial_seed() % 2 ** 31
+    seed_all_rng(initial_seed + worker_id)
diff --git a/src/sts/detectron2/data/catalog.py b/src/sts/detectron2/data/catalog.py
new file mode 100644
index 0000000000000000000000000000000000000000..45c110c19508f23921b9033cdaf0aa8056f0c125
--- /dev/null
+++ b/src/sts/detectron2/data/catalog.py
@@ -0,0 +1,236 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import types
+from collections import UserDict
+from typing import List
+
+from detectron2.utils.logger import log_first_n
+
+__all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"]
+
+
+class _DatasetCatalog(UserDict):
+    """
+    A global dictionary that stores information about the datasets and how to obtain them.
+
+    It contains a mapping from strings
+    (which are names that identify a dataset, e.g. "coco_2014_train")
+    to a function which parses the dataset and returns the samples in the
+    format of `list[dict]`.
+
+    The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details)
+    if used with the data loader functionalities in `data/build.py,data/detection_transform.py`.
+
+    The purpose of having this catalog is to make it easy to choose
+    different datasets, by just using the strings in the config.
+    """
+
+    def register(self, name, func):
+        """
+        Args:
+            name (str): the name that identifies a dataset, e.g. "coco_2014_train".
+            func (callable): a callable which takes no arguments and returns a list of dicts.
+                It must return the same results if called multiple times.
+        """
+        assert callable(func), "You must register a function with `DatasetCatalog.register`!"
+        assert name not in self, "Dataset '{}' is already registered!".format(name)
+        self[name] = func
+
+    def get(self, name):
+        """
+        Call the registered function and return its results.
+
+        Args:
+            name (str): the name that identifies a dataset, e.g. "coco_2014_train".
+
+        Returns:
+            list[dict]: dataset annotations.
+        """
+        try:
+            f = self[name]
+        except KeyError as e:
+            raise KeyError(
+                "Dataset '{}' is not registered! Available datasets are: {}".format(
+                    name, ", ".join(list(self.keys()))
+                )
+            ) from e
+        return f()
+
+    def list(self) -> List[str]:
+        """
+        List all registered datasets.
+
+        Returns:
+            list[str]
+        """
+        return list(self.keys())
+
+    def remove(self, name):
+        """
+        Alias of ``pop``.
+        """
+        self.pop(name)
+
+    def __str__(self):
+        return "DatasetCatalog(registered datasets: {})".format(", ".join(self.keys()))
+
+    __repr__ = __str__
+
+
+DatasetCatalog = _DatasetCatalog()
+DatasetCatalog.__doc__ = (
+    _DatasetCatalog.__doc__
+    + """
+    .. automethod:: detectron2.data.catalog.DatasetCatalog.register
+    .. automethod:: detectron2.data.catalog.DatasetCatalog.get
+"""
+)
+
+
+class Metadata(types.SimpleNamespace):
+    """
+    A class that supports simple attribute setter/getter.
+    It is intended for storing metadata of a dataset and make it accessible globally.
+
+    Examples:
+    ::
+        # somewhere when you load the data:
+        MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"]
+
+        # somewhere when you print statistics or visualize:
+        classes = MetadataCatalog.get("mydataset").thing_classes
+    """
+
+    # the name of the dataset
+    # set default to N/A so that `self.name` in the errors will not trigger getattr again
+    name: str = "N/A"
+
+    _RENAMED = {
+        "class_names": "thing_classes",
+        "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id",
+        "stuff_class_names": "stuff_classes",
+    }
+
+    def __getattr__(self, key):
+        if key in self._RENAMED:
+            log_first_n(
+                logging.WARNING,
+                "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
+                n=10,
+            )
+            return getattr(self, self._RENAMED[key])
+
+        # "name" exists in every metadata
+        if len(self.__dict__) > 1:
+            raise AttributeError(
+                "Attribute '{}' does not exist in the metadata of dataset '{}'. Available "
+                "keys are {}.".format(key, self.name, str(self.__dict__.keys()))
+            )
+        else:
+            raise AttributeError(
+                f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': "
+                "metadata is empty."
+            )
+
+    def __setattr__(self, key, val):
+        if key in self._RENAMED:
+            log_first_n(
+                logging.WARNING,
+                "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
+                n=10,
+            )
+            setattr(self, self._RENAMED[key], val)
+
+        # Ensure that metadata of the same name stays consistent
+        try:
+            oldval = getattr(self, key)
+            assert oldval == val, (
+                "Attribute '{}' in the metadata of '{}' cannot be set "
+                "to a different value!\n{} != {}".format(key, self.name, oldval, val)
+            )
+        except AttributeError:
+            super().__setattr__(key, val)
+
+    def as_dict(self):
+        """
+        Returns all the metadata as a dict.
+        Note that modifications to the returned dict will not reflect on the Metadata object.
+        """
+        return copy.copy(self.__dict__)
+
+    def set(self, **kwargs):
+        """
+        Set multiple metadata with kwargs.
+        """
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+        return self
+
+    def get(self, key, default=None):
+        """
+        Access an attribute and return its value if exists.
+        Otherwise return default.
+        """
+        try:
+            return getattr(self, key)
+        except AttributeError:
+            return default
+
+
+class _MetadataCatalog(UserDict):
+    """
+    MetadataCatalog is a global dictionary that provides access to
+    :class:`Metadata` of a given dataset.
+
+    The metadata associated with a certain name is a singleton: once created, the
+    metadata will stay alive and will be returned by future calls to ``get(name)``.
+
+    It's like global variables, so don't abuse it.
+    It's meant for storing knowledge that's constant and shared across the execution
+    of the program, e.g.: the class names in COCO.
+    """
+
+    def get(self, name):
+        """
+        Args:
+            name (str): name of a dataset (e.g. coco_2014_train).
+
+        Returns:
+            Metadata: The :class:`Metadata` instance associated with this name,
+            or create an empty one if none is available.
+        """
+        assert len(name)
+        r = super().get(name, None)
+        if r is None:
+            r = self[name] = Metadata(name=name)
+        return r
+
+    def list(self):
+        """
+        List all registered metadata.
+
+        Returns:
+            list[str]: keys (names of datasets) of all registered metadata
+        """
+        return list(self.keys())
+
+    def remove(self, name):
+        """
+        Alias of ``pop``.
+        """
+        self.pop(name)
+
+    def __str__(self):
+        return "MetadataCatalog(registered metadata: {})".format(", ".join(self.keys()))
+
+    __repr__ = __str__
+
+
+MetadataCatalog = _MetadataCatalog()
+MetadataCatalog.__doc__ = (
+    _MetadataCatalog.__doc__
+    + """
+    .. automethod:: detectron2.data.catalog.MetadataCatalog.get
+"""
+)
diff --git a/src/sts/detectron2/data/common.py b/src/sts/detectron2/data/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef7d97c2860b63d7ff4686f2e86f00fe6e181a35
--- /dev/null
+++ b/src/sts/detectron2/data/common.py
@@ -0,0 +1,186 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import logging
+import numpy as np
+import pickle
+import random
+import torch.utils.data as data
+from torch.utils.data.sampler import Sampler
+
+from detectron2.utils.serialize import PicklableWrapper
+
+__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset", "ToIterableDataset"]
+
+
+class MapDataset(data.Dataset):
+    """
+    Map a function over the elements in a dataset.
+
+    Args:
+        dataset: a dataset where map function is applied.
+        map_func: a callable which maps the element in dataset. map_func is
+            responsible for error handling, when error happens, it needs to
+            return None so the MapDataset will randomly use other
+            elements from the dataset.
+    """
+
+    def __init__(self, dataset, map_func):
+        self._dataset = dataset
+        self._map_func = PicklableWrapper(map_func)  # wrap so that a lambda will work
+
+        self._rng = random.Random(42)
+        self._fallback_candidates = set(range(len(dataset)))
+
+    def __len__(self):
+        return len(self._dataset)
+
+    def __getitem__(self, idx):
+        retry_count = 0
+        cur_idx = int(idx)
+
+        while True:
+            data = self._map_func(self._dataset[cur_idx])
+            if data is not None:
+                self._fallback_candidates.add(cur_idx)
+                return data
+
+            # _map_func fails for this idx, use a random new index from the pool
+            retry_count += 1
+            self._fallback_candidates.discard(cur_idx)
+            cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
+
+            if retry_count >= 3:
+                logger = logging.getLogger(__name__)
+                logger.warning(
+                    "Failed to apply `_map_func` for idx: {}, retry count: {}".format(
+                        idx, retry_count
+                    )
+                )
+
+
+class DatasetFromList(data.Dataset):
+    """
+    Wrap a list to a torch Dataset. It produces elements of the list as data.
+    """
+
+    def __init__(self, lst: list, copy: bool = True, serialize: bool = True):
+        """
+        Args:
+            lst (list): a list which contains elements to produce.
+            copy (bool): whether to deepcopy the element when producing it,
+                so that the result can be modified in place without affecting the
+                source in the list.
+            serialize (bool): whether to hold memory using serialized objects, when
+                enabled, data loader workers can use shared RAM from master
+                process instead of making a copy.
+        """
+        self._lst = lst
+        self._copy = copy
+        self._serialize = serialize
+
+        def _serialize(data):
+            buffer = pickle.dumps(data, protocol=-1)
+            return np.frombuffer(buffer, dtype=np.uint8)
+
+        if self._serialize:
+            logger = logging.getLogger(__name__)
+            logger.info(
+                "Serializing {} elements to byte tensors and concatenating them all ...".format(
+                    len(self._lst)
+                )
+            )
+            self._lst = [_serialize(x) for x in self._lst]
+            self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
+            self._addr = np.cumsum(self._addr)
+            self._lst = np.concatenate(self._lst)
+            logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024 ** 2))
+
+    def __len__(self):
+        if self._serialize:
+            return len(self._addr)
+        else:
+            return len(self._lst)
+
+    def __getitem__(self, idx):
+        if self._serialize:
+            start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
+            end_addr = self._addr[idx].item()
+            bytes = memoryview(self._lst[start_addr:end_addr])
+            return pickle.loads(bytes)
+        elif self._copy:
+            return copy.deepcopy(self._lst[idx])
+        else:
+            return self._lst[idx]
+
+
+class ToIterableDataset(data.IterableDataset):
+    """
+    Convert an old indices-based (also called map-style) dataset
+    to an iterable-style dataset.
+    """
+
+    def __init__(self, dataset, sampler):
+        """
+        Args:
+            dataset (torch.utils.data.Dataset): an old-style dataset with ``__getitem__``
+            sampler (torch.utils.data.sampler.Sampler): a cheap iterable that produces indices
+                to be applied on ``dataset``.
+        """
+        assert not isinstance(dataset, data.IterableDataset), dataset
+        assert isinstance(sampler, Sampler), sampler
+        self.dataset = dataset
+        self.sampler = sampler
+
+    def __iter__(self):
+        worker_info = data.get_worker_info()
+        if worker_info is None or worker_info.num_workers == 1:
+            for idx in self.sampler:
+                yield self.dataset[idx]
+        else:
+            # With map-style dataset, `DataLoader(dataset, sampler)` runs the
+            # sampler in main process only. But `DataLoader(ToIterableDataset(dataset, sampler))`
+            # will run sampler in every of the N worker and only keep 1/N of the ids on each
+            # worker. The assumption is that sampler is cheap to iterate and it's fine to discard
+            # ids in workers.
+            for idx in itertools.islice(
+                self.sampler, worker_info.id, None, worker_info.num_workers
+            ):
+                yield self.dataset[idx]
+
+
+class AspectRatioGroupedDataset(data.IterableDataset):
+    """
+    Batch data that have similar aspect ratio together.
+    In this implementation, images whose aspect ratio < (or >) 1 will
+    be batched together.
+    This improves training speed because the images then need less padding
+    to form a batch.
+
+    It assumes the underlying dataset produces dicts with "width" and "height" keys.
+    It will then produce a list of original dicts with length = batch_size,
+    all with similar aspect ratios.
+    """
+
+    def __init__(self, dataset, batch_size):
+        """
+        Args:
+            dataset: an iterable. Each element must be a dict with keys
+                "width" and "height", which will be used to batch data.
+            batch_size (int):
+        """
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self._buckets = [[] for _ in range(2)]
+        # Hard-coded two aspect ratio groups: w > h and w < h.
+        # Can add support for more aspect ratio groups, but doesn't seem useful
+
+    def __iter__(self):
+        for d in self.dataset:
+            w, h = d["width"], d["height"]
+            bucket_id = 0 if w > h else 1
+            bucket = self._buckets[bucket_id]
+            bucket.append(d)
+            if len(bucket) == self.batch_size:
+                yield bucket[:]
+                del bucket[:]
diff --git a/src/sts/detectron2/data/dataset_mapper.py b/src/sts/detectron2/data/dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..55631fc6ab027a0a6d5d6b3c0b902a09a4a85efc
--- /dev/null
+++ b/src/sts/detectron2/data/dataset_mapper.py
@@ -0,0 +1,186 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import numpy as np
+from typing import List, Optional, Union
+import torch
+
+from detectron2.config import configurable
+
+from . import detection_utils as utils
+from . import transforms as T
+
+"""
+This file contains the default mapping that's applied to "dataset dicts".
+"""
+
+__all__ = ["DatasetMapper"]
+
+
+class DatasetMapper:
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by the model.
+
+    This is the default callable to be used to map your dataset dict into training data.
+    You may need to follow it to implement your own one for customized logic,
+    such as a different way to read or transform images.
+    See :doc:`/tutorials/data_loading` for details.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies cropping/geometric transforms to the image and annotations
+    3. Prepare data and annotations to Tensor and :class:`Instances`
+    """
+
+    @configurable
+    def __init__(
+        self,
+        is_train: bool,
+        *,
+        augmentations: List[Union[T.Augmentation, T.Transform]],
+        image_format: str,
+        use_instance_mask: bool = False,
+        use_keypoint: bool = False,
+        instance_mask_format: str = "polygon",
+        keypoint_hflip_indices: Optional[np.ndarray] = None,
+        precomputed_proposal_topk: Optional[int] = None,
+        recompute_boxes: bool = False,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            is_train: whether it's used in training or inference
+            augmentations: a list of augmentations or deterministic transforms to apply
+            image_format: an image format supported by :func:`detection_utils.read_image`.
+            use_instance_mask: whether to process instance segmentation annotations, if available
+            use_keypoint: whether to process keypoint annotations if available
+            instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation
+                masks into this format.
+            keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices`
+            precomputed_proposal_topk: if given, will load pre-computed
+                proposals from dataset_dict and keep the top k proposals for each image.
+            recompute_boxes: whether to overwrite bounding box annotations
+                by computing tight bounding boxes from instance mask annotations.
+        """
+        if recompute_boxes:
+            assert use_instance_mask, "recompute_boxes requires instance masks"
+        # fmt: off
+        self.is_train               = is_train
+        self.augmentations          = T.AugmentationList(augmentations)
+        self.image_format           = image_format
+        self.use_instance_mask      = use_instance_mask
+        self.instance_mask_format   = instance_mask_format
+        self.use_keypoint           = use_keypoint
+        self.keypoint_hflip_indices = keypoint_hflip_indices
+        self.proposal_topk          = precomputed_proposal_topk
+        self.recompute_boxes        = recompute_boxes
+        # fmt: on
+        logger = logging.getLogger(__name__)
+        mode = "training" if is_train else "inference"
+        logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
+
+    @classmethod
+    def from_config(cls, cfg, is_train: bool = True):
+        augs = utils.build_augmentation(cfg, is_train)
+        if cfg.INPUT.CROP.ENABLED and is_train:
+            augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
+            recompute_boxes = cfg.MODEL.MASK_ON
+        else:
+            recompute_boxes = False
+
+        ret = {
+            "is_train": is_train,
+            "augmentations": augs,
+            "image_format": cfg.INPUT.FORMAT,
+            "use_instance_mask": cfg.MODEL.MASK_ON,
+            "instance_mask_format": cfg.INPUT.MASK_FORMAT,
+            "use_keypoint": cfg.MODEL.KEYPOINT_ON,
+            "recompute_boxes": recompute_boxes,
+        }
+
+        if cfg.MODEL.KEYPOINT_ON:
+            ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
+
+        if cfg.MODEL.LOAD_PROPOSALS:
+            ret["precomputed_proposal_topk"] = (
+                cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
+                if is_train
+                else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
+            )
+        return ret
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        # USER: Write your own image loading if it's not from a file
+        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
+        utils.check_image_size(dataset_dict, image)
+
+        # USER: Remove if you don't do semantic/panoptic segmentation.
+        if "sem_seg_file_name" in dataset_dict:
+            sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
+        else:
+            sem_seg_gt = None
+
+        aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
+        transforms = self.augmentations(aug_input)
+        image, sem_seg_gt = aug_input.image, aug_input.sem_seg
+
+        image_shape = image.shape[:2]  # h, w
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+        if sem_seg_gt is not None:
+            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))
+
+        # USER: Remove if you don't use pre-computed proposals.
+        # Most users would not need this feature.
+        if self.proposal_topk is not None:
+            utils.transform_proposals(
+                dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
+            )
+
+        if not self.is_train:
+            # USER: Modify this if you want to keep them for some reason.
+            dataset_dict.pop("annotations", None)
+            dataset_dict.pop("sem_seg_file_name", None)
+            return dataset_dict
+        if "annotations" in dataset_dict:
+            # USER: Modify this if you want to keep them for some reason.
+            for anno in dataset_dict["annotations"]:
+                if not self.use_instance_mask:
+                    anno.pop("segmentation", None)
+                if not self.use_keypoint:
+                    anno.pop("keypoints", None)
+
+            # USER: Implement additional transformations if you have other types of data
+            annos = [
+                utils.transform_instance_annotations(
+                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
+                )
+                for obj in dataset_dict.pop("annotations")
+                if obj.get("iscrowd", 0) == 0
+            ]
+            instances = utils.annotations_to_instances(
+                annos, image_shape, mask_format=self.instance_mask_format
+            )
+
+            # After transforms such as cropping are applied, the bounding box may no longer
+            # tightly bound the object. As an example, imagine a triangle object
+            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
+            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
+            # the intersection of original bounding box and the cropping box.
+            if self.recompute_boxes:
+                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
+            dataset_dict["instances"] = utils.filter_empty_instances(instances)
+        return dataset_dict
diff --git a/src/sts/detectron2/data/datasets/README.md b/src/sts/detectron2/data/datasets/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9fb3e4f7afec17137c95c78be6ef06d520ec8032
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/README.md
@@ -0,0 +1,9 @@
+
+
+### Common Datasets
+
+The dataset implemented here do not need to load the data into the final format.
+It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
+
+For example, for an image dataset, just provide the file names and labels, but don't read the images.
+Let the downstream decide how to read.
diff --git a/src/sts/detectron2/data/datasets/__init__.py b/src/sts/detectron2/data/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbd92e8e2e1295d73e28f1eb2ed2368f368849a3
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .coco import load_coco_json, load_sem_seg, register_coco_instances
+from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
+from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
+from .pascal_voc import load_voc_instances, register_pascal_voc
+from . import builtin as _builtin  # ensure the builtin datasets are registered
+
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/detectron2/data/datasets/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..60f7cb9fa7d133d1f9c0ba2022571c46e2b8be53
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/builtin.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/builtin.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a1ede40c6b90658e5fb3447e76d3c3fb00bc1c3f
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/builtin.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/builtin_meta.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/builtin_meta.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a969e74674dd0ddcd0d8418c13201a2c7ed740ec
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/builtin_meta.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/cityscapes.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/cityscapes.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1eb66424d965f35b404e3c86262008152f3e87f9
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/cityscapes.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/cityscapes_panoptic.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/cityscapes_panoptic.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc6a3a4adced133725e679bf76d8e082b14c0bf1
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/cityscapes_panoptic.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/coco.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/coco.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..965a5bf9027802ac9ce29577185d00c52a03b097
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/coco.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/coco_panoptic.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/coco_panoptic.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f1ab11f83cf0f4b186da6c9c7e81b822480b835a
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/coco_panoptic.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/lvis.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/lvis.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..52ab714b18206d883098e848257435952d7845f1
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/lvis.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/lvis_v0_5_categories.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/lvis_v0_5_categories.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f7e86084c1b2cc94410e994aed05cecbbec312f2
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/lvis_v0_5_categories.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/lvis_v1_categories.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/lvis_v1_categories.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bfcc5a4631edbadea4651e67a964f4da2aecf851
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/lvis_v1_categories.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/__pycache__/pascal_voc.cpython-38.pyc b/src/sts/detectron2/data/datasets/__pycache__/pascal_voc.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cf7e0839d536ff1cb02fe392682839cfa66073b7
Binary files /dev/null and b/src/sts/detectron2/data/datasets/__pycache__/pascal_voc.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/datasets/builtin.py b/src/sts/detectron2/data/datasets/builtin.py
new file mode 100644
index 0000000000000000000000000000000000000000..eaacb44cfe65579cc3c466c5c94fb186c100c9a5
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/builtin.py
@@ -0,0 +1,280 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+
+"""
+This file registers pre-defined datasets at hard-coded paths, and their metadata.
+
+We hard-code metadata for common datasets. This will enable:
+1. Consistency check when loading the datasets
+2. Use models on these standard datasets directly and run demos,
+   without having to download the dataset annotations
+
+We hard-code some paths to the dataset that's assumed to
+exist in "./datasets/".
+
+Users SHOULD NOT use this file to create new dataset / metadata for new dataset.
+To add new dataset, refer to the tutorial "docs/DATASETS.md".
+"""
+
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+
+from .builtin_meta import ADE20K_SEM_SEG_CATEGORIES, _get_builtin_metadata
+from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic
+from .cityscapes_panoptic import register_all_cityscapes_panoptic
+from .coco import load_sem_seg, register_coco_instances
+from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
+from .lvis import get_lvis_instances_meta, register_lvis_instances
+from .pascal_voc import register_pascal_voc
+
+# ==== Predefined datasets and splits for COCO ==========
+
+_PREDEFINED_SPLITS_COCO = {}
+_PREDEFINED_SPLITS_COCO["coco"] = {
+    "coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"),
+    "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"),
+    "coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"),
+    "coco_2014_minival_100": ("coco/val2014", "coco/annotations/instances_minival2014_100.json"),
+    "coco_2014_valminusminival": (
+        "coco/val2014",
+        "coco/annotations/instances_valminusminival2014.json",
+    ),
+    "coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"),
+    "coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"),
+    "coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"),
+    "coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"),
+    "coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"),
+    "icdar_2015_train": ("coco/ic15_images", "annotations/icdar_2015.json"),
+    "icdar_2013_train": ("coco/ic13_images", "annotations/icdar_2013.json"),
+    "icdar_2017_mlt": ("icdar_2017_mlt", "annotations/icdar_2017_mlt.json"),
+    "icdar_2017_validation_mlt": ("icdar_2017_validation_mlt", "annotations/icdar_2017_validation_mlt.json"),
+    "icdar_curvesynthtext_train1": ("curve_text/emcs_imgs", "annotations/ecms_v1_maxlen25.json"),
+    "icdar_curvesynthtext_train2": ("curve_text/syntext_word_eng", "annotations/syntext_word_eng.json"),
+    "art": ("icdar2019_art_images", "annotations/icdar_2019_art_swints.json"),
+    "rects": ("icdar2019_rects_images", "annotations/icdar_2019_rects_swints.json"),
+    "lsvt": ("icdar2019_lsvt_images", "annotations/icdar_2019_lsvt_swints.json"),
+    "chn_syn": ("chn_syn_images", "annotations/chn_syn.json"),
+    "totaltext_train": ("totaltext/totaltext_train_images", "totaltext/totaltext_train.json"),
+    "totaltext_test": ("totaltext/totaltext_test_images", "totaltext/totaltext_test.json"),
+    "vintext_train": ("fimotext/train_images", "fimotext/train.json"),
+    "vintext_test": ("fimotext/val_images", "fimotext/valid.json"),
+    "ctw1500_train": ("train2017", "annotations/instances_train2017.json"),
+    "ctw1500_test": ("ctwtest_text_image", "annotations/test_ctw1500_maxlen100.json"),
+}  
+
+_PREDEFINED_SPLITS_COCO["coco_person"] = {
+    "keypoints_coco_2014_train": (
+        "coco/train2014",
+        "coco/annotations/person_keypoints_train2014.json",
+    ),
+    "keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"),
+    "keypoints_coco_2014_minival": (
+        "coco/val2014",
+        "coco/annotations/person_keypoints_minival2014.json",
+    ),
+    "keypoints_coco_2014_valminusminival": (
+        "coco/val2014",
+        "coco/annotations/person_keypoints_valminusminival2014.json",
+    ),
+    "keypoints_coco_2014_minival_100": (
+        "coco/val2014",
+        "coco/annotations/person_keypoints_minival2014_100.json",
+    ),
+    "keypoints_coco_2017_train": (
+        "coco/train2017",
+        "coco/annotations/person_keypoints_train2017.json",
+    ),
+    "keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"),
+    "keypoints_coco_2017_val_100": (
+        "coco/val2017",
+        "coco/annotations/person_keypoints_val2017_100.json",
+    ),
+}
+
+
+_PREDEFINED_SPLITS_COCO_PANOPTIC = {
+    "coco_2017_train_panoptic": (
+        # This is the original panoptic annotation directory
+        "coco/panoptic_train2017",
+        "coco/annotations/panoptic_train2017.json",
+        # This directory contains semantic annotations that are
+        # converted from panoptic annotations.
+        # It is used by PanopticFPN.
+        # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py
+        # to create these directories.
+        "coco/panoptic_stuff_train2017",
+    ),
+    "coco_2017_val_panoptic": (
+        "coco/panoptic_val2017",
+        "coco/annotations/panoptic_val2017.json",
+        "coco/panoptic_stuff_val2017",
+    ),
+    "coco_2017_val_100_panoptic": (
+        "coco/panoptic_val2017_100",
+        "coco/annotations/panoptic_val2017_100.json",
+        "coco/panoptic_stuff_val2017_100",
+    ),
+}
+
+
+def register_all_coco(root):
+    for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items():
+        for key, (image_root, json_file) in splits_per_dataset.items():
+            # Assume pre-defined datasets live in `./datasets`.
+            register_coco_instances(
+                key,
+                _get_builtin_metadata(dataset_name),
+                os.path.join(root, json_file) if "://" not in json_file else json_file,
+                os.path.join(root, image_root),
+            )
+
+    for (
+        prefix,
+        (panoptic_root, panoptic_json, semantic_root),
+    ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items():
+        prefix_instances = prefix[: -len("_panoptic")]
+        instances_meta = MetadataCatalog.get(prefix_instances)
+        image_root, instances_json = instances_meta.image_root, instances_meta.json_file
+        # The "separated" version of COCO panoptic segmentation dataset,
+        # e.g. used by Panoptic FPN
+        register_coco_panoptic_separated(
+            prefix,
+            _get_builtin_metadata("coco_panoptic_separated"),
+            image_root,
+            os.path.join(root, panoptic_root),
+            os.path.join(root, panoptic_json),
+            os.path.join(root, semantic_root),
+            instances_json,
+        )
+        # The "standard" version of COCO panoptic segmentation dataset,
+        # e.g. used by Panoptic-DeepLab
+        register_coco_panoptic(
+            prefix,
+            _get_builtin_metadata("coco_panoptic_standard"),
+            image_root,
+            os.path.join(root, panoptic_root),
+            os.path.join(root, panoptic_json),
+            instances_json,
+        )
+
+
+# ==== Predefined datasets and splits for LVIS ==========
+
+
+_PREDEFINED_SPLITS_LVIS = {
+    "lvis_v1": {
+        "lvis_v1_train": ("coco/", "lvis/lvis_v1_train.json"),
+        "lvis_v1_val": ("coco/", "lvis/lvis_v1_val.json"),
+        "lvis_v1_test_dev": ("coco/", "lvis/lvis_v1_image_info_test_dev.json"),
+        "lvis_v1_test_challenge": ("coco/", "lvis/lvis_v1_image_info_test_challenge.json"),
+    },
+    "lvis_v0.5": {
+        "lvis_v0.5_train": ("coco/", "lvis/lvis_v0.5_train.json"),
+        "lvis_v0.5_val": ("coco/", "lvis/lvis_v0.5_val.json"),
+        "lvis_v0.5_val_rand_100": ("coco/", "lvis/lvis_v0.5_val_rand_100.json"),
+        "lvis_v0.5_test": ("coco/", "lvis/lvis_v0.5_image_info_test.json"),
+    },
+    "lvis_v0.5_cocofied": {
+        "lvis_v0.5_train_cocofied": ("coco/", "lvis/lvis_v0.5_train_cocofied.json"),
+        "lvis_v0.5_val_cocofied": ("coco/", "lvis/lvis_v0.5_val_cocofied.json"),
+    },
+}
+
+
+def register_all_lvis(root):
+    for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items():
+        for key, (image_root, json_file) in splits_per_dataset.items():
+            register_lvis_instances(
+                key,
+                get_lvis_instances_meta(dataset_name),
+                os.path.join(root, json_file) if "://" not in json_file else json_file,
+                os.path.join(root, image_root),
+            )
+
+
+# ==== Predefined splits for raw cityscapes images ===========
+_RAW_CITYSCAPES_SPLITS = {
+    "cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train/", "cityscapes/gtFine/train/"),
+    "cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val/", "cityscapes/gtFine/val/"),
+    "cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test/", "cityscapes/gtFine/test/"),
+}
+
+
+def register_all_cityscapes(root):
+    for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items():
+        meta = _get_builtin_metadata("cityscapes")
+        image_dir = os.path.join(root, image_dir)
+        gt_dir = os.path.join(root, gt_dir)
+
+        inst_key = key.format(task="instance_seg")
+        DatasetCatalog.register(
+            inst_key,
+            lambda x=image_dir, y=gt_dir: load_cityscapes_instances(
+                x, y, from_json=True, to_polygons=True
+            ),
+        )
+        MetadataCatalog.get(inst_key).set(
+            image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta
+        )
+
+        sem_key = key.format(task="sem_seg")
+        DatasetCatalog.register(
+            sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y)
+        )
+        MetadataCatalog.get(sem_key).set(
+            image_dir=image_dir,
+            gt_dir=gt_dir,
+            evaluator_type="cityscapes_sem_seg",
+            ignore_label=255,
+            **meta,
+        )
+
+
+# ==== Predefined splits for PASCAL VOC ===========
+def register_all_pascal_voc(root):
+    SPLITS = [
+        ("voc_2007_trainval", "VOC2007", "trainval"),
+        ("voc_2007_train", "VOC2007", "train"),
+        ("voc_2007_val", "VOC2007", "val"),
+        ("voc_2007_test", "VOC2007", "test"),
+        ("voc_2012_trainval", "VOC2012", "trainval"),
+        ("voc_2012_train", "VOC2012", "train"),
+        ("voc_2012_val", "VOC2012", "val"),
+    ]
+    for name, dirname, split in SPLITS:
+        year = 2007 if "2007" in name else 2012
+        register_pascal_voc(name, os.path.join(root, dirname), split, year)
+        MetadataCatalog.get(name).evaluator_type = "pascal_voc"
+
+
+def register_all_ade20k(root):
+    root = os.path.join(root, "ADEChallengeData2016")
+    for name, dirname in [("train", "training"), ("val", "validation")]:
+        image_dir = os.path.join(root, "images", dirname)
+        gt_dir = os.path.join(root, "annotations_detectron2", dirname)
+        name = f"ade20k_sem_seg_{name}"
+        DatasetCatalog.register(
+            name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
+        )
+        MetadataCatalog.get(name).set(
+            stuff_classes=ADE20K_SEM_SEG_CATEGORIES[:],
+            image_root=image_dir,
+            sem_seg_root=gt_dir,
+            evaluator_type="sem_seg",
+            ignore_label=255,
+        )
+
+
+# True for open source;
+# Internally at fb, we register them elsewhere
+if __name__.endswith(".builtin"):
+    # Assume pre-defined datasets live in `./datasets`.
+    _root = os.getenv("DETECTRON2_DATASETS", "datasets")
+    register_all_coco(_root)
+    register_all_lvis(_root)
+    register_all_cityscapes(_root)
+    register_all_cityscapes_panoptic(_root)
+    register_all_pascal_voc(_root)
+    register_all_ade20k(_root)
diff --git a/src/sts/detectron2/data/datasets/builtin_meta.py b/src/sts/detectron2/data/datasets/builtin_meta.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e0c7a8f52a014b997cd3764805680849d6f45ff
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/builtin_meta.py
@@ -0,0 +1,350 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+Note:
+For your custom dataset, there is no need to hard-code metadata anywhere in the code.
+For example, for COCO-format dataset, metadata will be obtained automatically
+when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways
+during loading.
+
+However, we hard-coded metadata for a few common dataset here.
+The only goal is to allow users who don't have these dataset to use pre-trained models.
+Users don't have to download a COCO json (which contains metadata), in order to visualize a
+COCO model (with correct class names and colors).
+"""
+
+
+# All coco categories, together with their nice-looking visualization colors
+# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
+COCO_CATEGORIES = [
+    {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "text"},
+  #  {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
+  #  {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
+  #  {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
+  #  {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
+  #  {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
+  #  {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
+  #  {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
+  #  {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
+  #  {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
+  #  {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
+  #  {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
+  #  {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
+  #  {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
+  #  {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
+  #  {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
+  #  {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
+  #  {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
+  #  {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
+  #  {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
+  #  {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
+  #  {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
+  #  {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
+  #  {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
+  #  {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
+  #  {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
+  #  {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
+  #  {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
+  #  {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
+  #  {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
+  #  {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
+  #  {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
+  #  {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
+  #  {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
+  #  {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
+  #  {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
+  #  {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
+  #  {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
+  #  {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
+  #  {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
+  #  {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
+  #  {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
+  #  {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
+  #  {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
+  #  {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
+  #  {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
+  #  {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
+  #  {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
+  #  {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
+  #  {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
+  #  {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
+  #  {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
+  #  {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
+  #  {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
+  #  {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
+  #  {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
+  #  {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
+  #  {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
+  #  {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
+  #  {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
+  #  {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
+  #  {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
+  #  {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
+  #  {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
+  #  {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
+  #  {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
+  #  {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
+  #  {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
+  #  {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
+  #  {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
+  #  {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
+  #  {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
+  #  {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
+  #  {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
+  #  {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
+  #  {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
+  #  {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
+  #  {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
+  #  {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
+  #  {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
+  #  {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
+  #  {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
+  #  {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
+  #  {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
+  #  {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
+  #  {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
+  #  {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
+  #  {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
+  #  {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
+  #  {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
+  #  {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
+  #  {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
+  #  {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
+  #  {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
+  #  {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
+  #  {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
+  #  {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
+  #  {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
+  #  {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
+  #  {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
+  #  {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
+  #  {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
+  #  {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
+  #  {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
+  #  {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
+  #  {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
+  #  {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
+  #  {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
+  #  {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
+  #  {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
+  #  {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
+  #  {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
+  #  {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
+  #  {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
+  #  {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
+  #  {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
+  #  {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
+  #  {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
+  #  {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
+  #  {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
+  #  {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
+  #  {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
+  #  {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
+  #  {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
+  #  {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
+  #  {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
+  #  {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
+  #  {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
+  #  {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
+  #  {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
+  #  {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
+  #  {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
+  #  {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
+]
+
+# fmt: off
+COCO_PERSON_KEYPOINT_NAMES = (
+    "nose",
+    "left_eye", "right_eye",
+    "left_ear", "right_ear",
+    "left_shoulder", "right_shoulder",
+    "left_elbow", "right_elbow",
+    "left_wrist", "right_wrist",
+    "left_hip", "right_hip",
+    "left_knee", "right_knee",
+    "left_ankle", "right_ankle",
+)
+# fmt: on
+
+# Pairs of keypoints that should be exchanged under horizontal flipping
+COCO_PERSON_KEYPOINT_FLIP_MAP = (
+    ("left_eye", "right_eye"),
+    ("left_ear", "right_ear"),
+    ("left_shoulder", "right_shoulder"),
+    ("left_elbow", "right_elbow"),
+    ("left_wrist", "right_wrist"),
+    ("left_hip", "right_hip"),
+    ("left_knee", "right_knee"),
+    ("left_ankle", "right_ankle"),
+)
+
+# rules for pairs of keypoints to draw a line between, and the line color to use.
+KEYPOINT_CONNECTION_RULES = [
+    # face
+    ("left_ear", "left_eye", (102, 204, 255)),
+    ("right_ear", "right_eye", (51, 153, 255)),
+    ("left_eye", "nose", (102, 0, 204)),
+    ("nose", "right_eye", (51, 102, 255)),
+    # upper-body
+    ("left_shoulder", "right_shoulder", (255, 128, 0)),
+    ("left_shoulder", "left_elbow", (153, 255, 204)),
+    ("right_shoulder", "right_elbow", (128, 229, 255)),
+    ("left_elbow", "left_wrist", (153, 255, 153)),
+    ("right_elbow", "right_wrist", (102, 255, 224)),
+    # lower-body
+    ("left_hip", "right_hip", (255, 102, 0)),
+    ("left_hip", "left_knee", (255, 255, 77)),
+    ("right_hip", "right_knee", (153, 255, 204)),
+    ("left_knee", "left_ankle", (191, 255, 128)),
+    ("right_knee", "right_ankle", (255, 195, 77)),
+]
+
+# All Cityscapes categories, together with their nice-looking visualization colors
+# It's from https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py  # noqa
+CITYSCAPES_CATEGORIES = [
+    {"color": (128, 64, 128), "isthing": 0, "id": 7, "trainId": 0, "name": "road"},
+    {"color": (244, 35, 232), "isthing": 0, "id": 8, "trainId": 1, "name": "sidewalk"},
+    {"color": (70, 70, 70), "isthing": 0, "id": 11, "trainId": 2, "name": "building"},
+    {"color": (102, 102, 156), "isthing": 0, "id": 12, "trainId": 3, "name": "wall"},
+    {"color": (190, 153, 153), "isthing": 0, "id": 13, "trainId": 4, "name": "fence"},
+    {"color": (153, 153, 153), "isthing": 0, "id": 17, "trainId": 5, "name": "pole"},
+    {"color": (250, 170, 30), "isthing": 0, "id": 19, "trainId": 6, "name": "traffic light"},
+    {"color": (220, 220, 0), "isthing": 0, "id": 20, "trainId": 7, "name": "traffic sign"},
+    {"color": (107, 142, 35), "isthing": 0, "id": 21, "trainId": 8, "name": "vegetation"},
+    {"color": (152, 251, 152), "isthing": 0, "id": 22, "trainId": 9, "name": "terrain"},
+    {"color": (70, 130, 180), "isthing": 0, "id": 23, "trainId": 10, "name": "sky"},
+    {"color": (220, 20, 60), "isthing": 1, "id": 24, "trainId": 11, "name": "person"},
+    {"color": (255, 0, 0), "isthing": 1, "id": 25, "trainId": 12, "name": "rider"},
+    {"color": (0, 0, 142), "isthing": 1, "id": 26, "trainId": 13, "name": "car"},
+    {"color": (0, 0, 70), "isthing": 1, "id": 27, "trainId": 14, "name": "truck"},
+    {"color": (0, 60, 100), "isthing": 1, "id": 28, "trainId": 15, "name": "bus"},
+    {"color": (0, 80, 100), "isthing": 1, "id": 31, "trainId": 16, "name": "train"},
+    {"color": (0, 0, 230), "isthing": 1, "id": 32, "trainId": 17, "name": "motorcycle"},
+    {"color": (119, 11, 32), "isthing": 1, "id": 33, "trainId": 18, "name": "bicycle"},
+]
+
+# fmt: off
+ADE20K_SEM_SEG_CATEGORIES = [
+    "wall", "building", "sky", "floor", "tree", "ceiling", "road, route", "bed", "window ", "grass", "cabinet", "sidewalk, pavement", "person", "earth, ground", "door", "table", "mountain, mount", "plant", "curtain", "chair", "car", "water", "painting, picture", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock, stone", "wardrobe, closet, press", "lamp", "tub", "rail", "cushion", "base, pedestal, stand", "box", "column, pillar", "signboard, sign", "chest of drawers, chest, bureau, dresser", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator, icebox", "grandstand, covered stand", "path", "stairs", "runway", "case, display case, showcase, vitrine", "pool table, billiard table, snooker table", "pillow", "screen door, screen", "stairway, staircase", "river", "bridge, span", "bookcase", "blind, screen", "coffee table", "toilet, can, commode, crapper, pot, potty, stool, throne", "flower", "book", "hill", "bench", "countertop", "stove", "palm, palm tree", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel, hut, hutch, shack, shanty", "bus", "towel", "light", "truck", "tower", "chandelier", "awning, sunshade, sunblind", "street lamp", "booth", "tv", "plane", "dirt track", "clothes", "pole", "land, ground, soil", "bannister, banister, balustrade, balusters, handrail", "escalator, moving staircase, moving stairway", "ottoman, pouf, pouffe, puff, hassock", "bottle", "buffet, counter, sideboard", "poster, posting, placard, notice, bill, card", "stage", "van", "ship", "fountain", "conveyer belt, conveyor belt, conveyer, conveyor, transporter", "canopy", "washer, automatic washer, washing machine", "plaything, toy", "pool", "stool", "barrel, cask", "basket, handbasket", "falls", "tent", "bag", "minibike, motorbike", "cradle", "oven", "ball", "food, solid food", "step, stair", "tank, storage tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase", "traffic light", "tray", "trash can", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass, drinking glass", "clock", "flag", # noqa
+]
+# After processed by `prepare_ade20k_sem_seg.py`, id 255 means ignore
+# fmt: on
+
+
+def _get_coco_instances_meta():
+    thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+    thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+   # assert len(thing_ids) == 80, len(thing_ids)
+    # Mapping from the incontiguous COCO category id to an id in [0, 79]
+    thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
+    thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+    ret = {
+        "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
+        "thing_classes": thing_classes,
+        "thing_colors": thing_colors,
+    }
+    return ret
+
+
+def _get_coco_panoptic_separated_meta():
+    """
+    Returns metadata for "separated" version of the panoptic segmentation dataset.
+    """
+    stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0]
+   # assert len(stuff_ids) == 53, len(stuff_ids)
+
+    # For semantic segmentation, this mapping maps from contiguous stuff id
+    # (in [0, 53], used in models) to ids in the dataset (used for processing results)
+    # The id 0 is mapped to an extra category "thing".
+    stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)}
+    # When converting COCO panoptic annotations to semantic annotations
+    # We label the "thing" category to 0
+    stuff_dataset_id_to_contiguous_id[0] = 0
+
+    # 54 names for COCO stuff categories (including "things")
+    stuff_classes = ["things"] + [
+        k["name"].replace("-other", "").replace("-merged", "")
+        for k in COCO_CATEGORIES
+        if k["isthing"] == 0
+    ]
+
+    # NOTE: I randomly picked a color for things
+    stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0]
+    ret = {
+        "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
+        "stuff_classes": stuff_classes,
+        "stuff_colors": stuff_colors,
+    }
+    ret.update(_get_coco_instances_meta())
+    return ret
+
+
+def _get_builtin_metadata(dataset_name):
+    if dataset_name == "coco":
+        return _get_coco_instances_meta()
+    if dataset_name == "coco_panoptic_separated":
+        return _get_coco_panoptic_separated_meta()
+    elif dataset_name == "coco_panoptic_standard":
+        meta = {}
+        # The following metadata maps contiguous id from [0, #thing categories +
+        # #stuff categories) to their names and colors. We have to replica of the
+        # same name and color under "thing_*" and "stuff_*" because the current
+        # visualization function in D2 handles thing and class classes differently
+        # due to some heuristic used in Panoptic FPN. We keep the same naming to
+        # enable reusing existing visualization functions.
+        thing_classes = [k["name"] for k in COCO_CATEGORIES]
+        thing_colors = [k["color"] for k in COCO_CATEGORIES]
+        stuff_classes = [k["name"] for k in COCO_CATEGORIES]
+        stuff_colors = [k["color"] for k in COCO_CATEGORIES]
+
+        meta["thing_classes"] = thing_classes
+        meta["thing_colors"] = thing_colors
+        meta["stuff_classes"] = stuff_classes
+        meta["stuff_colors"] = stuff_colors
+
+        # Convert category id for training:
+        #   category id: like semantic segmentation, it is the class id for each
+        #   pixel. Since there are some classes not used in evaluation, the category
+        #   id is not always contiguous and thus we have two set of category ids:
+        #       - original category id: category id in the original dataset, mainly
+        #           used for evaluation.
+        #       - contiguous category id: [0, #classes), in order to train the linear
+        #           softmax classifier.
+        thing_dataset_id_to_contiguous_id = {}
+        stuff_dataset_id_to_contiguous_id = {}
+
+        for i, cat in enumerate(COCO_CATEGORIES):
+            if cat["isthing"]:
+                thing_dataset_id_to_contiguous_id[cat["id"]] = i
+            else:
+                stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+
+        meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+        meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+        return meta
+    elif dataset_name == "coco_person":
+        return {
+            "thing_classes": ["person"],
+            "keypoint_names": COCO_PERSON_KEYPOINT_NAMES,
+            "keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP,
+            "keypoint_connection_rules": KEYPOINT_CONNECTION_RULES,
+        }
+    elif dataset_name == "cityscapes":
+        # fmt: off
+        CITYSCAPES_THING_CLASSES = [
+            "person", "rider", "car", "truck",
+            "bus", "train", "motorcycle", "bicycle",
+        ]
+        CITYSCAPES_STUFF_CLASSES = [
+            "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light",
+            "traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car",
+            "truck", "bus", "train", "motorcycle", "bicycle",
+        ]
+        # fmt: on
+        return {
+            "thing_classes": CITYSCAPES_THING_CLASSES,
+            "stuff_classes": CITYSCAPES_STUFF_CLASSES,
+        }
+    raise KeyError("No built-in metadata for dataset {}".format(dataset_name))
diff --git a/src/sts/detectron2/data/datasets/cityscapes.py b/src/sts/detectron2/data/datasets/cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e84a5bdb3d4e410d8eef4b80a5d4c099a180104
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/cityscapes.py
@@ -0,0 +1,329 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import functools
+import json
+import logging
+import multiprocessing as mp
+import numpy as np
+import os
+from itertools import chain
+import pycocotools.mask as mask_util
+from PIL import Image
+
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import get_world_size
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+
+try:
+    import cv2  # noqa
+except ImportError:
+    # OpenCV is an optional dependency at the moment
+    pass
+
+
+logger = logging.getLogger(__name__)
+
+
+def _get_cityscapes_files(image_dir, gt_dir):
+    files = []
+    # scan through the directory
+    cities = PathManager.ls(image_dir)
+    logger.info(f"{len(cities)} cities found in '{image_dir}'.")
+    for city in cities:
+        city_img_dir = os.path.join(image_dir, city)
+        city_gt_dir = os.path.join(gt_dir, city)
+        for basename in PathManager.ls(city_img_dir):
+            image_file = os.path.join(city_img_dir, basename)
+
+            suffix = "leftImg8bit.png"
+            assert basename.endswith(suffix), basename
+            basename = basename[: -len(suffix)]
+
+            instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png")
+            label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png")
+            json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json")
+
+            files.append((image_file, instance_file, label_file, json_file))
+    assert len(files), "No images found in {}".format(image_dir)
+    for f in files[0]:
+        assert PathManager.isfile(f), f
+    return files
+
+
+def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+        gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
+        from_json (bool): whether to read annotations from the raw json file or the png files.
+        to_polygons (bool): whether to represent the segmentation as polygons
+            (COCO's format) instead of masks (cityscapes's format).
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+    if from_json:
+        assert to_polygons, (
+            "Cityscapes's json annotations are in polygon format. "
+            "Converting to mask format is not supported now."
+        )
+    files = _get_cityscapes_files(image_dir, gt_dir)
+
+    logger.info("Preprocessing cityscapes annotations ...")
+    # This is still not fast: all workers will execute duplicate works and will
+    # take up to 10m on a 8GPU server.
+    pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4))
+
+    ret = pool.map(
+        functools.partial(_cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons),
+        files,
+    )
+    logger.info("Loaded {} images from {}".format(len(ret), image_dir))
+
+    # Map cityscape ids to contiguous ids
+    from cityscapesscripts.helpers.labels import labels
+
+    labels = [l for l in labels if l.hasInstances and not l.ignoreInEval]
+    dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)}
+    for dict_per_image in ret:
+        for anno in dict_per_image["annotations"]:
+            anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]]
+    return ret
+
+
+def load_cityscapes_semantic(image_dir, gt_dir):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+        gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
+
+    Returns:
+        list[dict]: a list of dict, each has "file_name" and
+            "sem_seg_file_name".
+    """
+    ret = []
+    # gt_dir is small and contain many small files. make sense to fetch to local first
+    gt_dir = PathManager.get_local_path(gt_dir)
+    for image_file, _, label_file, json_file in _get_cityscapes_files(image_dir, gt_dir):
+        label_file = label_file.replace("labelIds", "labelTrainIds")
+
+        with PathManager.open(json_file, "r") as f:
+            jsonobj = json.load(f)
+        ret.append(
+            {
+                "file_name": image_file,
+                "sem_seg_file_name": label_file,
+                "height": jsonobj["imgHeight"],
+                "width": jsonobj["imgWidth"],
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(
+        ret[0]["sem_seg_file_name"]
+    ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py"  # noqa
+    return ret
+
+
+def _cityscapes_files_to_dict(files, from_json, to_polygons):
+    """
+    Parse cityscapes annotation files to a instance segmentation dataset dict.
+
+    Args:
+        files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file)
+        from_json (bool): whether to read annotations from the raw json file or the png files.
+        to_polygons (bool): whether to represent the segmentation as polygons
+            (COCO's format) instead of masks (cityscapes's format).
+
+    Returns:
+        A dict in Detectron2 Dataset format.
+    """
+    from cityscapesscripts.helpers.labels import id2label, name2label
+
+    image_file, instance_id_file, _, json_file = files
+
+    annos = []
+
+    if from_json:
+        from shapely.geometry import MultiPolygon, Polygon
+
+        with PathManager.open(json_file, "r") as f:
+            jsonobj = json.load(f)
+        ret = {
+            "file_name": image_file,
+            "image_id": os.path.basename(image_file),
+            "height": jsonobj["imgHeight"],
+            "width": jsonobj["imgWidth"],
+        }
+
+        # `polygons_union` contains the union of all valid polygons.
+        polygons_union = Polygon()
+
+        # CityscapesScripts draw the polygons in sequential order
+        # and each polygon *overwrites* existing ones. See
+        # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa
+        # We use reverse order, and each polygon *avoids* early ones.
+        # This will resolve the ploygon overlaps in the same way as CityscapesScripts.
+        for obj in jsonobj["objects"][::-1]:
+            if "deleted" in obj:  # cityscapes data format specific
+                continue
+            label_name = obj["label"]
+
+            try:
+                label = name2label[label_name]
+            except KeyError:
+                if label_name.endswith("group"):  # crowd area
+                    label = name2label[label_name[: -len("group")]]
+                else:
+                    raise
+            if label.id < 0:  # cityscapes data format
+                continue
+
+            # Cityscapes's raw annotations uses integer coordinates
+            # Therefore +0.5 here
+            poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5
+            # CityscapesScript uses PIL.ImageDraw.polygon to rasterize
+            # polygons for evaluation. This function operates in integer space
+            # and draws each pixel whose center falls into the polygon.
+            # Therefore it draws a polygon which is 0.5 "fatter" in expectation.
+            # We therefore dilate the input polygon by 0.5 as our input.
+            poly = Polygon(poly_coord).buffer(0.5, resolution=4)
+
+            if not label.hasInstances or label.ignoreInEval:
+                # even if we won't store the polygon it still contributes to overlaps resolution
+                polygons_union = polygons_union.union(poly)
+                continue
+
+            # Take non-overlapping part of the polygon
+            poly_wo_overlaps = poly.difference(polygons_union)
+            if poly_wo_overlaps.is_empty:
+                continue
+            polygons_union = polygons_union.union(poly)
+
+            anno = {}
+            anno["iscrowd"] = label_name.endswith("group")
+            anno["category_id"] = label.id
+
+            if isinstance(poly_wo_overlaps, Polygon):
+                poly_list = [poly_wo_overlaps]
+            elif isinstance(poly_wo_overlaps, MultiPolygon):
+                poly_list = poly_wo_overlaps.geoms
+            else:
+                raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps))
+
+            poly_coord = []
+            for poly_el in poly_list:
+                # COCO API can work only with exterior boundaries now, hence we store only them.
+                # TODO: store both exterior and interior boundaries once other parts of the
+                # codebase support holes in polygons.
+                poly_coord.append(list(chain(*poly_el.exterior.coords)))
+            anno["segmentation"] = poly_coord
+            (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds
+
+            anno["bbox"] = (xmin, ymin, xmax, ymax)
+            anno["bbox_mode"] = BoxMode.XYXY_ABS
+
+            annos.append(anno)
+    else:
+        # See also the official annotation parsing scripts at
+        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py  # noqa
+        with PathManager.open(instance_id_file, "rb") as f:
+            inst_image = np.asarray(Image.open(f), order="F")
+        # ids < 24 are stuff labels (filtering them first is about 5% faster)
+        flattened_ids = np.unique(inst_image[inst_image >= 24])
+
+        ret = {
+            "file_name": image_file,
+            "image_id": os.path.basename(image_file),
+            "height": inst_image.shape[0],
+            "width": inst_image.shape[1],
+        }
+
+        for instance_id in flattened_ids:
+            # For non-crowd annotations, instance_id // 1000 is the label_id
+            # Crowd annotations have <1000 instance ids
+            label_id = instance_id // 1000 if instance_id >= 1000 else instance_id
+            label = id2label[label_id]
+            if not label.hasInstances or label.ignoreInEval:
+                continue
+
+            anno = {}
+            anno["iscrowd"] = instance_id < 1000
+            anno["category_id"] = label.id
+
+            mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F")
+
+            inds = np.nonzero(mask)
+            ymin, ymax = inds[0].min(), inds[0].max()
+            xmin, xmax = inds[1].min(), inds[1].max()
+            anno["bbox"] = (xmin, ymin, xmax, ymax)
+            if xmax <= xmin or ymax <= ymin:
+                continue
+            anno["bbox_mode"] = BoxMode.XYXY_ABS
+            if to_polygons:
+                # This conversion comes from D4809743 and D5171122,
+                # when Mask-RCNN was first developed.
+                contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[
+                    -2
+                ]
+                polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3]
+                # opencv's can produce invalid polygons
+                if len(polygons) == 0:
+                    continue
+                anno["segmentation"] = polygons
+            else:
+                anno["segmentation"] = mask_util.encode(mask[:, :, None])[0]
+            annos.append(anno)
+    ret["annotations"] = annos
+    return ret
+
+
+if __name__ == "__main__":
+    """
+    Test the cityscapes dataset loader.
+
+    Usage:
+        python -m detectron2.data.datasets.cityscapes \
+            cityscapes/leftImg8bit/train cityscapes/gtFine/train
+    """
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("image_dir")
+    parser.add_argument("gt_dir")
+    parser.add_argument("--type", choices=["instance", "semantic"], default="instance")
+    args = parser.parse_args()
+    from detectron2.data.catalog import Metadata
+    from detectron2.utils.visualizer import Visualizer
+    from cityscapesscripts.helpers.labels import labels
+
+    logger = setup_logger(name=__name__)
+
+    dirname = "cityscapes-data-vis"
+    os.makedirs(dirname, exist_ok=True)
+
+    if args.type == "instance":
+        dicts = load_cityscapes_instances(
+            args.image_dir, args.gt_dir, from_json=True, to_polygons=True
+        )
+        logger.info("Done loading {} samples.".format(len(dicts)))
+
+        thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval]
+        meta = Metadata().set(thing_classes=thing_classes)
+
+    else:
+        dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir)
+        logger.info("Done loading {} samples.".format(len(dicts)))
+
+        stuff_classes = [k.name for k in labels if k.trainId != 255]
+        stuff_colors = [k.color for k in labels if k.trainId != 255]
+        meta = Metadata().set(stuff_classes=stuff_classes, stuff_colors=stuff_colors)
+
+    for d in dicts:
+        img = np.array(Image.open(PathManager.open(d["file_name"], "rb")))
+        visualizer = Visualizer(img, metadata=meta)
+        vis = visualizer.draw_dataset_dict(d)
+        # cv2.imshow("a", vis.get_image()[:, :, ::-1])
+        # cv2.waitKey()
+        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+        vis.save(fpath)
diff --git a/src/sts/detectron2/data/datasets/cityscapes_panoptic.py b/src/sts/detectron2/data/datasets/cityscapes_panoptic.py
new file mode 100644
index 0000000000000000000000000000000000000000..48c136f1623261b079591065fec7c7fc38165076
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/cityscapes_panoptic.py
@@ -0,0 +1,187 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import json
+import logging
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets.builtin_meta import CITYSCAPES_CATEGORIES
+from detectron2.utils.file_io import PathManager
+
+"""
+This file contains functions to register the Cityscapes panoptic dataset to the DatasetCatalog.
+"""
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info):
+    files = []
+    # scan through the directory
+    cities = PathManager.ls(image_dir)
+    logger.info(f"{len(cities)} cities found in '{image_dir}'.")
+    image_dict = {}
+    for city in cities:
+        city_img_dir = os.path.join(image_dir, city)
+        for basename in PathManager.ls(city_img_dir):
+            image_file = os.path.join(city_img_dir, basename)
+
+            suffix = "_leftImg8bit.png"
+            assert basename.endswith(suffix), basename
+            basename = os.path.basename(basename)[: -len(suffix)]
+
+            image_dict[basename] = image_file
+
+    for ann in json_info["annotations"]:
+        image_file = image_dict.get(ann["image_id"], None)
+        assert image_file is not None, "No image {} found for annotation {}".format(
+            ann["image_id"], ann["file_name"]
+        )
+        label_file = os.path.join(gt_dir, ann["file_name"])
+        segments_info = ann["segments_info"]
+
+        files.append((image_file, label_file, segments_info))
+
+    assert len(files), "No images found in {}".format(image_dir)
+    assert PathManager.isfile(files[0][0]), files[0][0]
+    assert PathManager.isfile(files[0][1]), files[0][1]
+    return files
+
+
+def load_cityscapes_panoptic(image_dir, gt_dir, gt_json, meta):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+        gt_dir (str): path to the raw annotations. e.g.,
+            "~/cityscapes/gtFine/cityscapes_panoptic_train".
+        gt_json (str): path to the json file. e.g.,
+            "~/cityscapes/gtFine/cityscapes_panoptic_train.json".
+        meta (dict): dictionary containing "thing_dataset_id_to_contiguous_id"
+            and "stuff_dataset_id_to_contiguous_id" to map category ids to
+            contiguous ids for training.
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+
+    def _convert_category_id(segment_info, meta):
+        if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+            segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+        else:
+            segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+        return segment_info
+
+    assert os.path.exists(
+        gt_json
+    ), "Please run `python cityscapesscripts/preparation/createPanopticImgs.py` to generate label files."  # noqa
+    with open(gt_json) as f:
+        json_info = json.load(f)
+    files = get_cityscapes_panoptic_files(image_dir, gt_dir, json_info)
+    ret = []
+    for image_file, label_file, segments_info in files:
+        sem_label_file = (
+            image_file.replace("leftImg8bit", "gtFine").split(".")[0] + "_labelTrainIds.png"
+        )
+        segments_info = [_convert_category_id(x, meta) for x in segments_info]
+        ret.append(
+            {
+                "file_name": image_file,
+                "image_id": "_".join(
+                    os.path.splitext(os.path.basename(image_file))[0].split("_")[:3]
+                ),
+                "sem_seg_file_name": sem_label_file,
+                "pan_seg_file_name": label_file,
+                "segments_info": segments_info,
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(
+        ret[0]["sem_seg_file_name"]
+    ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py"  # noqa
+    assert PathManager.isfile(
+        ret[0]["pan_seg_file_name"]
+    ), "Please generate panoptic annotation with python cityscapesscripts/preparation/createPanopticImgs.py"  # noqa
+    return ret
+
+
+_RAW_CITYSCAPES_PANOPTIC_SPLITS = {
+    "cityscapes_fine_panoptic_train": (
+        "cityscapes/leftImg8bit/train",
+        "cityscapes/gtFine/cityscapes_panoptic_train",
+        "cityscapes/gtFine/cityscapes_panoptic_train.json",
+    ),
+    "cityscapes_fine_panoptic_val": (
+        "cityscapes/leftImg8bit/val",
+        "cityscapes/gtFine/cityscapes_panoptic_val",
+        "cityscapes/gtFine/cityscapes_panoptic_val.json",
+    ),
+    # "cityscapes_fine_panoptic_test": not supported yet
+}
+
+
+def register_all_cityscapes_panoptic(root):
+    meta = {}
+    # The following metadata maps contiguous id from [0, #thing categories +
+    # #stuff categories) to their names and colors. We have to replica of the
+    # same name and color under "thing_*" and "stuff_*" because the current
+    # visualization function in D2 handles thing and class classes differently
+    # due to some heuristic used in Panoptic FPN. We keep the same naming to
+    # enable reusing existing visualization functions.
+    thing_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
+    thing_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
+    stuff_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
+    stuff_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
+
+    meta["thing_classes"] = thing_classes
+    meta["thing_colors"] = thing_colors
+    meta["stuff_classes"] = stuff_classes
+    meta["stuff_colors"] = stuff_colors
+
+    # There are three types of ids in cityscapes panoptic segmentation:
+    # (1) category id: like semantic segmentation, it is the class id for each
+    #   pixel. Since there are some classes not used in evaluation, the category
+    #   id is not always contiguous and thus we have two set of category ids:
+    #       - original category id: category id in the original dataset, mainly
+    #           used for evaluation.
+    #       - contiguous category id: [0, #classes), in order to train the classifier
+    # (2) instance id: this id is used to differentiate different instances from
+    #   the same category. For "stuff" classes, the instance id is always 0; for
+    #   "thing" classes, the instance id starts from 1 and 0 is reserved for
+    #   ignored instances (e.g. crowd annotation).
+    # (3) panoptic id: this is the compact id that encode both category and
+    #   instance id by: category_id * 1000 + instance_id.
+    thing_dataset_id_to_contiguous_id = {}
+    stuff_dataset_id_to_contiguous_id = {}
+
+    for k in CITYSCAPES_CATEGORIES:
+        if k["isthing"] == 1:
+            thing_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
+        else:
+            stuff_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
+
+    meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+    meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+    for key, (image_dir, gt_dir, gt_json) in _RAW_CITYSCAPES_PANOPTIC_SPLITS.items():
+        image_dir = os.path.join(root, image_dir)
+        gt_dir = os.path.join(root, gt_dir)
+        gt_json = os.path.join(root, gt_json)
+
+        DatasetCatalog.register(
+            key, lambda x=image_dir, y=gt_dir, z=gt_json: load_cityscapes_panoptic(x, y, z, meta)
+        )
+        MetadataCatalog.get(key).set(
+            panoptic_root=gt_dir,
+            image_root=image_dir,
+            panoptic_json=gt_json,
+            gt_dir=gt_dir.replace("cityscapes_panoptic_", ""),
+            evaluator_type="cityscapes_panoptic_seg",
+            ignore_label=255,
+            label_divisor=1000,
+            **meta,
+        )
diff --git a/src/sts/detectron2/data/datasets/coco.py b/src/sts/detectron2/data/datasets/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..4820f008afefeb4f61285063076b2f1bd7228b38
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/coco.py
@@ -0,0 +1,532 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import datetime
+import io
+import json
+import logging
+import numpy as np
+import os
+import shutil
+import pycocotools.mask as mask_util
+from fvcore.common.timer import Timer
+from iopath.common.file_io import file_lock
+from PIL import Image
+
+from detectron2.structures import Boxes, BoxMode, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from .. import DatasetCatalog, MetadataCatalog
+
+"""
+This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format".
+"""
+
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["load_coco_json", "load_sem_seg", "convert_to_coco_json", "register_coco_instances"]
+
+
+def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
+    """
+    Load a json file with COCO's instances annotation format.
+    Currently supports instance detection, instance segmentation,
+    and person keypoints annotations.
+
+    Args:
+        json_file (str): full path to the json file in COCO instances annotation format.
+        image_root (str or path-like): the directory where the images in this json file exists.
+        dataset_name (str or None): the name of the dataset (e.g., coco_2017_train).
+            When provided, this function will also do the following:
+
+            * Put "thing_classes" into the metadata associated with this dataset.
+            * Map the category ids into a contiguous range (needed by standard dataset format),
+              and add "thing_dataset_id_to_contiguous_id" to the metadata associated
+              with this dataset.
+
+            This option should usually be provided, unless users need to load
+            the original json content and apply more processing manually.
+        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
+            loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
+            "category_id", "segmentation"). The values for these keys will be returned as-is.
+            For example, the densepose annotations are loaded in this way.
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ ) when `dataset_name` is not None.
+        If `dataset_name` is None, the returned `category_ids` may be
+        incontiguous and may not conform to the Detectron2 standard format.
+
+    Notes:
+        1. This function does not read the image files.
+           The results do not have the "image" field.
+    """
+    from pycocotools.coco import COCO
+
+    timer = Timer()
+    json_file = PathManager.get_local_path(json_file)
+    with contextlib.redirect_stdout(io.StringIO()):
+        coco_api = COCO(json_file)
+    if timer.seconds() > 1:
+        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+
+    id_map = None
+    if dataset_name is not None:
+        meta = MetadataCatalog.get(dataset_name)
+        cat_ids = sorted(coco_api.getCatIds())
+        cats = coco_api.loadCats(cat_ids)
+        # The categories in a custom json file may not be sorted.
+        thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
+        meta.thing_classes = thing_classes
+
+        # In COCO, certain category ids are artificially removed,
+        # and by convention they are always ignored.
+        # We deal with COCO's id issue and translate
+        # the category ids to contiguous ids in [0, 80).
+
+        # It works by looking at the "categories" field in the json, therefore
+        # if users' own json also have incontiguous ids, we'll
+        # apply this mapping as well but print a warning.
+        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
+            if "coco" not in dataset_name:
+                logger.warning(
+                    """
+Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
+"""
+                )
+        id_map = {v: i for i, v in enumerate(cat_ids)}
+        meta.thing_dataset_id_to_contiguous_id = id_map
+
+    # sort indices for reproducible results
+    img_ids = sorted(coco_api.imgs.keys())
+    # imgs is a list of dicts, each looks something like:
+    # {'license': 4,
+    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+    #  'file_name': 'COCO_val2014_000000001268.jpg',
+    #  'height': 427,
+    #  'width': 640,
+    #  'date_captured': '2013-11-17 05:57:24',
+    #  'id': 1268}
+    imgs = coco_api.loadImgs(img_ids)
+    # anns is a list[list[dict]], where each dict is an annotation
+    # record for an object. The inner list enumerates the objects in an image
+    # and the outer list enumerates over images. Example of anns[0]:
+    # [{'segmentation': [[192.81,
+    #     247.09,
+    #     ...
+    #     219.03,
+    #     249.06]],
+    #   'area': 1035.749,
+    #   'iscrowd': 0,
+    #   'image_id': 1268,
+    #   'bbox': [192.81, 224.8, 74.73, 33.43],
+    #   'category_id': 16,
+    #   'id': 42986},
+    #  ...]
+    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
+    total_num_valid_anns = sum([len(x) for x in anns])
+    total_num_anns = len(coco_api.anns)
+    if total_num_valid_anns < total_num_anns:
+        logger.warning(
+            f"{json_file} contains {total_num_anns} annotations, but only "
+            f"{total_num_valid_anns} of them match to images in the file."
+        )
+
+    if "minival" not in json_file:
+        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
+        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
+        # Therefore we explicitly white-list them.
+        ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+        assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
+            json_file
+        )
+
+    imgs_anns = list(zip(imgs, anns))
+    logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
+
+    dataset_dicts = []
+
+    ann_keys = ["iscrowd", "bbox", "keypoints", "category_id", "rec"] + (extra_annotation_keys or [])
+
+    num_instances_without_valid_segmentation = 0
+
+    for (img_dict, anno_dict_list) in imgs_anns:
+        record = {}
+        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
+        record["height"] = img_dict["height"]
+        record["width"] = img_dict["width"]
+        image_id = record["image_id"] = img_dict["id"]
+
+        objs = []
+        for anno in anno_dict_list:
+            # Check that the image_id in this annotation is the same as
+            # the image_id we're looking at.
+            # This fails only when the data parsing logic or the annotation file is buggy.
+
+            # The original COCO valminusminival2014 & minival2014 annotation files
+            # actually contains bugs that, together with certain ways of using COCO API,
+            # can trigger this assertion.
+            assert anno["image_id"] == image_id
+
+            assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'
+
+            obj = {key: anno[key] for key in ann_keys if key in anno}
+            segm = anno.get("segmentation", None)
+            if segm:  # either list[list[float]] or dict(RLE)
+                if isinstance(segm, dict):
+                    if isinstance(segm["counts"], list):
+                        # convert to compressed RLE
+                        segm = mask_util.frPyObjects(segm, *segm["size"])
+                else:
+                    # filter out invalid polygons (< 3 points)
+                    segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+                    if len(segm) == 0:
+                        num_instances_without_valid_segmentation += 1
+                        continue  # ignore this instance
+                obj["segmentation"] = segm
+            keypts = anno.get("keypoints", None)
+            if keypts:  # list[int]
+                for idx, v in enumerate(keypts):
+                    if idx % 3 != 2:
+                        # COCO's segmentation coordinates are floating points in [0, H or W],
+                        # but keypoint coordinates are integers in [0, H-1 or W-1]
+                        # Therefore we assume the coordinates are "pixel indices" and
+                        # add 0.5 to convert to floating point coordinates.
+                        keypts[idx] = v + 0.5
+                obj["keypoints"] = keypts
+
+            obj["bbox_mode"] = BoxMode.XYWH_ABS
+            if id_map:
+                annotation_category_id = obj["category_id"]
+                try:
+                    obj["category_id"] = id_map[annotation_category_id]
+                except KeyError as e:
+                    raise KeyError(
+                        f"Encountered category_id={annotation_category_id} "
+                        "but this id does not exist in 'categories' of the json file."
+                    ) from e
+            objs.append(obj)
+        record["annotations"] = objs
+        dataset_dicts.append(record)
+
+    if num_instances_without_valid_segmentation > 0:
+        logger.warning(
+            "Filtered out {} instances without valid segmentation. ".format(
+                num_instances_without_valid_segmentation
+            )
+            + "There might be issues in your dataset generation process. "
+            "A valid polygon should be a list[float] with even length >= 6."
+        )
+    return dataset_dicts
+
+
+def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
+    """
+    Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are
+    treated as ground truth annotations and all files under "image_root" with "image_ext" extension
+    as input images. Ground truth and input images are matched using file paths relative to
+    "gt_root" and "image_root" respectively without taking into account file extensions.
+    This works for COCO as well as some other datasets.
+
+    Args:
+        gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
+            annotations are stored as images with integer values in pixels that represent
+            corresponding semantic labels.
+        image_root (str): the directory where the input images are.
+        gt_ext (str): file extension for ground truth annotations.
+        image_ext (str): file extension for input images.
+
+    Returns:
+        list[dict]:
+            a list of dicts in detectron2 standard format without instance-level
+            annotation.
+
+    Notes:
+        1. This function does not read the image and ground truth files.
+           The results do not have the "image" and "sem_seg" fields.
+    """
+
+    # We match input images with ground truth based on their relative filepaths (without file
+    # extensions) starting from 'image_root' and 'gt_root' respectively.
+    def file2id(folder_path, file_path):
+        # extract relative path starting from `folder_path`
+        image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
+        # remove file extension
+        image_id = os.path.splitext(image_id)[0]
+        return image_id
+
+    input_files = sorted(
+        (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
+        key=lambda file_path: file2id(image_root, file_path),
+    )
+    gt_files = sorted(
+        (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
+        key=lambda file_path: file2id(gt_root, file_path),
+    )
+
+    assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
+
+    # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
+    if len(input_files) != len(gt_files):
+        logger.warn(
+            "Directory {} and {} has {} and {} files, respectively.".format(
+                image_root, gt_root, len(input_files), len(gt_files)
+            )
+        )
+        input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
+        gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
+        intersect = list(set(input_basenames) & set(gt_basenames))
+        # sort, otherwise each worker may obtain a list[dict] in different order
+        intersect = sorted(intersect)
+        logger.warn("Will use their intersection of {} files.".format(len(intersect)))
+        input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
+        gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
+
+    logger.info(
+        "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root)
+    )
+
+    dataset_dicts = []
+    for (img_path, gt_path) in zip(input_files, gt_files):
+        record = {}
+        record["file_name"] = img_path
+        record["sem_seg_file_name"] = gt_path
+        dataset_dicts.append(record)
+
+    return dataset_dicts
+
+
+def convert_to_coco_dict(dataset_name):
+    """
+    Convert an instance detection/segmentation or keypoint detection dataset
+    in detectron2's standard format into COCO json format.
+
+    Generic dataset description can be found here:
+    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset
+
+    COCO data format description can be found here:
+    http://cocodataset.org/#format-data
+
+    Args:
+        dataset_name (str):
+            name of the source dataset
+            Must be registered in DatastCatalog and in detectron2's standard format.
+            Must have corresponding metadata "thing_classes"
+    Returns:
+        coco_dict: serializable dict in COCO json format
+    """
+
+    dataset_dicts = DatasetCatalog.get(dataset_name)
+    metadata = MetadataCatalog.get(dataset_name)
+
+    # unmap the category mapping ids for COCO
+    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
+        reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()}
+        reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id]  # noqa
+    else:
+        reverse_id_mapper = lambda contiguous_id: contiguous_id  # noqa
+
+    categories = [
+        {"id": reverse_id_mapper(id), "name": name}
+        for id, name in enumerate(metadata.thing_classes)
+    ]
+
+    logger.info("Converting dataset dicts into COCO format")
+    coco_images = []
+    coco_annotations = []
+
+    for image_id, image_dict in enumerate(dataset_dicts):
+        coco_image = {
+            "id": image_dict.get("image_id", image_id),
+            "width": int(image_dict["width"]),
+            "height": int(image_dict["height"]),
+            "file_name": str(image_dict["file_name"]),
+        }
+        coco_images.append(coco_image)
+
+        anns_per_image = image_dict.get("annotations", [])
+        for annotation in anns_per_image:
+            # create a new dict with only COCO fields
+            coco_annotation = {}
+
+            # COCO requirement: XYWH box format for axis-align and XYWHA for rotated
+            bbox = annotation["bbox"]
+            if isinstance(bbox, np.ndarray):
+                if bbox.ndim != 1:
+                    raise ValueError(f"bbox has to be 1-dimensional. Got shape={bbox.shape}.")
+                bbox = bbox.tolist()
+            if len(bbox) not in [4, 5]:
+                raise ValueError(f"bbox has to has length 4 or 5. Got {bbox}.")
+            from_bbox_mode = annotation["bbox_mode"]
+            to_bbox_mode = BoxMode.XYWH_ABS if len(bbox) == 4 else BoxMode.XYWHA_ABS
+            bbox = BoxMode.convert(bbox, from_bbox_mode, to_bbox_mode)
+
+            # COCO requirement: instance area
+            if "segmentation" in annotation:
+                # Computing areas for instances by counting the pixels
+                segmentation = annotation["segmentation"]
+                # TODO: check segmentation type: RLE, BinaryMask or Polygon
+                if isinstance(segmentation, list):
+                    polygons = PolygonMasks([segmentation])
+                    area = polygons.area()[0].item()
+                elif isinstance(segmentation, dict):  # RLE
+                    area = mask_util.area(segmentation).item()
+                else:
+                    raise TypeError(f"Unknown segmentation type {type(segmentation)}!")
+            else:
+                # Computing areas using bounding boxes
+                if to_bbox_mode == BoxMode.XYWH_ABS:
+                    bbox_xy = BoxMode.convert(bbox, to_bbox_mode, BoxMode.XYXY_ABS)
+                    area = Boxes([bbox_xy]).area()[0].item()
+                else:
+                    area = RotatedBoxes([bbox]).area()[0].item()
+
+            if "keypoints" in annotation:
+                keypoints = annotation["keypoints"]  # list[int]
+                for idx, v in enumerate(keypoints):
+                    if idx % 3 != 2:
+                        # COCO's segmentation coordinates are floating points in [0, H or W],
+                        # but keypoint coordinates are integers in [0, H-1 or W-1]
+                        # For COCO format consistency we substract 0.5
+                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
+                        keypoints[idx] = v - 0.5
+                if "num_keypoints" in annotation:
+                    num_keypoints = annotation["num_keypoints"]
+                else:
+                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])
+
+            # COCO requirement:
+            #   linking annotations to images
+            #   "id" field must start with 1
+            coco_annotation["id"] = len(coco_annotations) + 1
+            coco_annotation["image_id"] = coco_image["id"]
+            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
+            coco_annotation["area"] = float(area)
+            coco_annotation["iscrowd"] = int(annotation.get("iscrowd", 0))
+            coco_annotation["category_id"] = int(reverse_id_mapper(annotation["category_id"]))
+
+            # Add optional fields
+            if "keypoints" in annotation:
+                coco_annotation["keypoints"] = keypoints
+                coco_annotation["num_keypoints"] = num_keypoints
+
+            if "segmentation" in annotation:
+                seg = coco_annotation["segmentation"] = annotation["segmentation"]
+                if isinstance(seg, dict):  # RLE
+                    counts = seg["counts"]
+                    if not isinstance(counts, str):
+                        # make it json-serializable
+                        seg["counts"] = counts.decode("ascii")
+
+            coco_annotations.append(coco_annotation)
+
+    logger.info(
+        "Conversion finished, "
+        f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}"
+    )
+
+    info = {
+        "date_created": str(datetime.datetime.now()),
+        "description": "Automatically generated COCO json file for Detectron2.",
+    }
+    coco_dict = {"info": info, "images": coco_images, "categories": categories, "licenses": None}
+    if len(coco_annotations) > 0:
+        coco_dict["annotations"] = coco_annotations
+    return coco_dict
+
+
+def convert_to_coco_json(dataset_name, output_file, allow_cached=True):
+    """
+    Converts dataset into COCO format and saves it to a json file.
+    dataset_name must be registered in DatasetCatalog and in detectron2's standard format.
+
+    Args:
+        dataset_name:
+            reference from the config file to the catalogs
+            must be registered in DatasetCatalog and in detectron2's standard format
+        output_file: path of json file that will be saved to
+        allow_cached: if json file is already present then skip conversion
+    """
+
+    # TODO: The dataset or the conversion script *may* change,
+    # a checksum would be useful for validating the cached data
+
+    PathManager.mkdirs(os.path.dirname(output_file))
+    with file_lock(output_file):
+        if PathManager.exists(output_file) and allow_cached:
+            logger.warning(
+                f"Using previously cached COCO format annotations at '{output_file}'. "
+                "You need to clear the cache file if your dataset has been modified."
+            )
+        else:
+            logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)")
+            coco_dict = convert_to_coco_dict(dataset_name)
+
+            logger.info(f"Caching COCO format annotations at '{output_file}' ...")
+            tmp_file = output_file + ".tmp"
+            with PathManager.open(tmp_file, "w") as f:
+                json.dump(coco_dict, f)
+            shutil.move(tmp_file, output_file)
+
+
+def register_coco_instances(name, metadata, json_file, image_root):
+    """
+    Register a dataset in COCO's json annotation format for
+    instance detection, instance segmentation and keypoint detection.
+    (i.e., Type 1 and 2 in http://cocodataset.org/#format-data.
+    `instances*.json` and `person_keypoints*.json` in the dataset).
+
+    This is an example of how to register a new dataset.
+    You can do something similar to this function, to register new datasets.
+
+    Args:
+        name (str): the name that identifies a dataset, e.g. "coco_2014_train".
+        metadata (dict): extra metadata associated with this dataset.  You can
+            leave it as an empty dict.
+        json_file (str): path to the json instance annotation file.
+        image_root (str or path-like): directory which contains all the images.
+    """
+    assert isinstance(name, str), name
+    assert isinstance(json_file, (str, os.PathLike)), json_file
+    assert isinstance(image_root, (str, os.PathLike)), image_root
+    # 1. register a function which returns dicts
+    DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name))
+
+    # 2. Optionally, add metadata about this dataset,
+    # since they might be useful in evaluation, visualization or logging
+    MetadataCatalog.get(name).set(
+        json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata
+    )
+
+
+if __name__ == "__main__":
+    """
+    Test the COCO json dataset loader.
+
+    Usage:
+        python -m detectron2.data.datasets.coco \
+            path/to/json path/to/image_root dataset_name
+
+        "dataset_name" can be "coco_2014_minival_100", or other
+        pre-registered ones
+    """
+    from detectron2.utils.logger import setup_logger
+    from detectron2.utils.visualizer import Visualizer
+    import detectron2.data.datasets  # noqa # add pre-defined metadata
+    import sys
+
+    logger = setup_logger(name=__name__)
+    assert sys.argv[3] in DatasetCatalog.list()
+    meta = MetadataCatalog.get(sys.argv[3])
+
+    dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3])
+    logger.info("Done loading {} samples.".format(len(dicts)))
+
+    dirname = "coco-data-vis"
+    os.makedirs(dirname, exist_ok=True)
+    for d in dicts:
+        img = np.array(Image.open(d["file_name"]))
+        visualizer = Visualizer(img, metadata=meta)
+        vis = visualizer.draw_dataset_dict(d)
+        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+        vis.save(fpath)
diff --git a/src/sts/detectron2/data/datasets/coco_panoptic.py b/src/sts/detectron2/data/datasets/coco_panoptic.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8dae44317b556610d7fed39017e082d7e855956
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/coco_panoptic.py
@@ -0,0 +1,228 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import json
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.file_io import PathManager
+
+from .coco import load_coco_json, load_sem_seg
+
+__all__ = ["register_coco_panoptic", "register_coco_panoptic_separated"]
+
+
+def load_coco_panoptic_json(json_file, image_dir, gt_dir, meta):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/coco/train2017".
+        gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017".
+        json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json".
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+
+    def _convert_category_id(segment_info, meta):
+        if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+            segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = True
+        else:
+            segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = False
+        return segment_info
+
+    with PathManager.open(json_file) as f:
+        json_info = json.load(f)
+
+    ret = []
+    for ann in json_info["annotations"]:
+        image_id = int(ann["image_id"])
+        # TODO: currently we assume image and label has the same filename but
+        # different extension, and images have extension ".jpg" for COCO. Need
+        # to make image extension a user-provided argument if we extend this
+        # function to support other COCO-like datasets.
+        image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg")
+        label_file = os.path.join(gt_dir, ann["file_name"])
+        segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]]
+        ret.append(
+            {
+                "file_name": image_file,
+                "image_id": image_id,
+                "pan_seg_file_name": label_file,
+                "segments_info": segments_info,
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"]
+    assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"]
+    return ret
+
+
+def register_coco_panoptic(
+    name, metadata, image_root, panoptic_root, panoptic_json, instances_json=None
+):
+    """
+    Register a "standard" version of COCO panoptic segmentation dataset named `name`.
+    The dictionaries in this registered dataset follows detectron2's standard format.
+    Hence it's called "standard".
+
+    Args:
+        name (str): the name that identifies a dataset,
+            e.g. "coco_2017_train_panoptic"
+        metadata (dict): extra metadata associated with this dataset.
+        image_root (str): directory which contains all the images
+        panoptic_root (str): directory which contains panoptic annotation images in COCO format
+        panoptic_json (str): path to the json panoptic annotation file in COCO format
+        sem_seg_root (none): not used, to be consistent with
+            `register_coco_panoptic_separated`.
+        instances_json (str): path to the json instance annotation file
+    """
+    panoptic_name = name
+    DatasetCatalog.register(
+        panoptic_name,
+        lambda: load_coco_panoptic_json(panoptic_json, image_root, panoptic_root, metadata),
+    )
+    MetadataCatalog.get(panoptic_name).set(
+        panoptic_root=panoptic_root,
+        image_root=image_root,
+        panoptic_json=panoptic_json,
+        json_file=instances_json,
+        evaluator_type="coco_panoptic_seg",
+        ignore_label=255,
+        label_divisor=1000,
+        **metadata,
+    )
+
+
+def register_coco_panoptic_separated(
+    name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json
+):
+    """
+    Register a "separated" version of COCO panoptic segmentation dataset named `name`.
+    The annotations in this registered dataset will contain both instance annotations and
+    semantic annotations, each with its own contiguous ids. Hence it's called "separated".
+
+    It follows the setting used by the PanopticFPN paper:
+
+    1. The instance annotations directly come from polygons in the COCO
+       instances annotation task, rather than from the masks in the COCO panoptic annotations.
+
+       The two format have small differences:
+       Polygons in the instance annotations may have overlaps.
+       The mask annotations are produced by labeling the overlapped polygons
+       with depth ordering.
+
+    2. The semantic annotations are converted from panoptic annotations, where
+       all "things" are assigned a semantic id of 0.
+       All semantic categories will therefore have ids in contiguous
+       range [1, #stuff_categories].
+
+    This function will also register a pure semantic segmentation dataset
+    named ``name + '_stuffonly'``.
+
+    Args:
+        name (str): the name that identifies a dataset,
+            e.g. "coco_2017_train_panoptic"
+        metadata (dict): extra metadata associated with this dataset.
+        image_root (str): directory which contains all the images
+        panoptic_root (str): directory which contains panoptic annotation images
+        panoptic_json (str): path to the json panoptic annotation file
+        sem_seg_root (str): directory which contains all the ground truth segmentation annotations.
+        instances_json (str): path to the json instance annotation file
+    """
+    panoptic_name = name + "_separated"
+    DatasetCatalog.register(
+        panoptic_name,
+        lambda: merge_to_panoptic(
+            load_coco_json(instances_json, image_root, panoptic_name),
+            load_sem_seg(sem_seg_root, image_root),
+        ),
+    )
+    MetadataCatalog.get(panoptic_name).set(
+        panoptic_root=panoptic_root,
+        image_root=image_root,
+        panoptic_json=panoptic_json,
+        sem_seg_root=sem_seg_root,
+        json_file=instances_json,  # TODO rename
+        evaluator_type="coco_panoptic_seg",
+        ignore_label=255,
+        **metadata,
+    )
+
+    semantic_name = name + "_stuffonly"
+    DatasetCatalog.register(semantic_name, lambda: load_sem_seg(sem_seg_root, image_root))
+    MetadataCatalog.get(semantic_name).set(
+        sem_seg_root=sem_seg_root,
+        image_root=image_root,
+        evaluator_type="sem_seg",
+        ignore_label=255,
+        **metadata,
+    )
+
+
+def merge_to_panoptic(detection_dicts, sem_seg_dicts):
+    """
+    Create dataset dicts for panoptic segmentation, by
+    merging two dicts using "file_name" field to match their entries.
+
+    Args:
+        detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation.
+        sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation.
+
+    Returns:
+        list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in
+            both detection_dicts and sem_seg_dicts that correspond to the same image.
+            The function assumes that the same key in different dicts has the same value.
+    """
+    results = []
+    sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts}
+    assert len(sem_seg_file_to_entry) > 0
+
+    for det_dict in detection_dicts:
+        dic = copy.copy(det_dict)
+        dic.update(sem_seg_file_to_entry[dic["file_name"]])
+        results.append(dic)
+    return results
+
+
+if __name__ == "__main__":
+    """
+    Test the COCO panoptic dataset loader.
+
+    Usage:
+        python -m detectron2.data.datasets.coco_panoptic \
+            path/to/image_root path/to/panoptic_root path/to/panoptic_json dataset_name 10
+
+        "dataset_name" can be "coco_2017_train_panoptic", or other
+        pre-registered ones
+    """
+    from detectron2.utils.logger import setup_logger
+    from detectron2.utils.visualizer import Visualizer
+    import detectron2.data.datasets  # noqa # add pre-defined metadata
+    import sys
+    from PIL import Image
+    import numpy as np
+
+    logger = setup_logger(name=__name__)
+    assert sys.argv[4] in DatasetCatalog.list()
+    meta = MetadataCatalog.get(sys.argv[4])
+
+    dicts = load_coco_panoptic_json(sys.argv[3], sys.argv[1], sys.argv[2], meta.as_dict())
+    logger.info("Done loading {} samples.".format(len(dicts)))
+
+    dirname = "coco-data-vis"
+    os.makedirs(dirname, exist_ok=True)
+    num_imgs_to_vis = int(sys.argv[5])
+    for i, d in enumerate(dicts):
+        img = np.array(Image.open(d["file_name"]))
+        visualizer = Visualizer(img, metadata=meta)
+        vis = visualizer.draw_dataset_dict(d)
+        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+        vis.save(fpath)
+        if i + 1 >= num_imgs_to_vis:
+            break
diff --git a/src/sts/detectron2/data/datasets/lvis.py b/src/sts/detectron2/data/datasets/lvis.py
new file mode 100644
index 0000000000000000000000000000000000000000..e663feb00a69c6763f09e731a828dd35161e6d3a
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/lvis.py
@@ -0,0 +1,228 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import os
+from fvcore.common.timer import Timer
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+from detectron2.utils.file_io import PathManager
+
+from .builtin_meta import _get_coco_instances_meta
+from .lvis_v0_5_categories import LVIS_CATEGORIES as LVIS_V0_5_CATEGORIES
+from .lvis_v1_categories import LVIS_CATEGORIES as LVIS_V1_CATEGORIES
+
+"""
+This file contains functions to parse LVIS-format annotations into dicts in the
+"Detectron2 format".
+"""
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["load_lvis_json", "register_lvis_instances", "get_lvis_instances_meta"]
+
+
+def register_lvis_instances(name, metadata, json_file, image_root):
+    """
+    Register a dataset in LVIS's json annotation format for instance detection and segmentation.
+
+    Args:
+        name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train".
+        metadata (dict): extra metadata associated with this dataset. It can be an empty dict.
+        json_file (str): path to the json instance annotation file.
+        image_root (str or path-like): directory which contains all the images.
+    """
+    DatasetCatalog.register(name, lambda: load_lvis_json(json_file, image_root, name))
+    MetadataCatalog.get(name).set(
+        json_file=json_file, image_root=image_root, evaluator_type="lvis", **metadata
+    )
+
+
+def load_lvis_json(json_file, image_root, dataset_name=None):
+    """
+    Load a json file in LVIS's annotation format.
+
+    Args:
+        json_file (str): full path to the LVIS json annotation file.
+        image_root (str): the directory where the images in this json file exists.
+        dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train").
+            If provided, this function will put "thing_classes" into the metadata
+            associated with this dataset.
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+
+    Notes:
+        1. This function does not read the image files.
+           The results do not have the "image" field.
+    """
+    from lvis import LVIS
+
+    json_file = PathManager.get_local_path(json_file)
+
+    timer = Timer()
+    lvis_api = LVIS(json_file)
+    if timer.seconds() > 1:
+        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+
+    if dataset_name is not None:
+        meta = get_lvis_instances_meta(dataset_name)
+        MetadataCatalog.get(dataset_name).set(**meta)
+
+    # sort indices for reproducible results
+    img_ids = sorted(lvis_api.imgs.keys())
+    # imgs is a list of dicts, each looks something like:
+    # {'license': 4,
+    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+    #  'file_name': 'COCO_val2014_000000001268.jpg',
+    #  'height': 427,
+    #  'width': 640,
+    #  'date_captured': '2013-11-17 05:57:24',
+    #  'id': 1268}
+    imgs = lvis_api.load_imgs(img_ids)
+    # anns is a list[list[dict]], where each dict is an annotation
+    # record for an object. The inner list enumerates the objects in an image
+    # and the outer list enumerates over images. Example of anns[0]:
+    # [{'segmentation': [[192.81,
+    #     247.09,
+    #     ...
+    #     219.03,
+    #     249.06]],
+    #   'area': 1035.749,
+    #   'image_id': 1268,
+    #   'bbox': [192.81, 224.8, 74.73, 33.43],
+    #   'category_id': 16,
+    #   'id': 42986},
+    #  ...]
+    anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
+
+    # Sanity check that each annotation has a unique id
+    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+    assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique".format(
+        json_file
+    )
+
+    imgs_anns = list(zip(imgs, anns))
+
+    logger.info("Loaded {} images in the LVIS format from {}".format(len(imgs_anns), json_file))
+
+    def get_file_name(img_root, img_dict):
+        # Determine the path including the split folder ("train2017", "val2017", "test2017") from
+        # the coco_url field. Example:
+        #   'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg'
+        split_folder, file_name = img_dict["coco_url"].split("/")[-2:]
+        return os.path.join(img_root + split_folder, file_name)
+
+    dataset_dicts = []
+
+    for (img_dict, anno_dict_list) in imgs_anns:
+        record = {}
+        record["file_name"] = get_file_name(image_root, img_dict)
+        record["height"] = img_dict["height"]
+        record["width"] = img_dict["width"]
+        record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", [])
+        record["neg_category_ids"] = img_dict.get("neg_category_ids", [])
+        image_id = record["image_id"] = img_dict["id"]
+
+        objs = []
+        for anno in anno_dict_list:
+            # Check that the image_id in this annotation is the same as
+            # the image_id we're looking at.
+            # This fails only when the data parsing logic or the annotation file is buggy.
+            assert anno["image_id"] == image_id
+            obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
+            # LVIS data loader can be used to load COCO dataset categories. In this case `meta`
+            # variable will have a field with COCO-specific category mapping.
+            if dataset_name is not None and "thing_dataset_id_to_contiguous_id" in meta:
+                obj["category_id"] = meta["thing_dataset_id_to_contiguous_id"][anno["category_id"]]
+            else:
+                obj["category_id"] = anno["category_id"] - 1  # Convert 1-indexed to 0-indexed
+            segm = anno["segmentation"]  # list[list[float]]
+            # filter out invalid polygons (< 3 points)
+            valid_segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+            assert len(segm) == len(
+                valid_segm
+            ), "Annotation contains an invalid polygon with < 3 points"
+            assert len(segm) > 0
+            obj["segmentation"] = segm
+            objs.append(obj)
+        record["annotations"] = objs
+        dataset_dicts.append(record)
+
+    return dataset_dicts
+
+
+def get_lvis_instances_meta(dataset_name):
+    """
+    Load LVIS metadata.
+
+    Args:
+        dataset_name (str): LVIS dataset name without the split name (e.g., "lvis_v0.5").
+
+    Returns:
+        dict: LVIS metadata with keys: thing_classes
+    """
+    if "cocofied" in dataset_name:
+        return _get_coco_instances_meta()
+    if "v0.5" in dataset_name:
+        return _get_lvis_instances_meta_v0_5()
+    elif "v1" in dataset_name:
+        return _get_lvis_instances_meta_v1()
+    raise ValueError("No built-in metadata for dataset {}".format(dataset_name))
+
+
+def _get_lvis_instances_meta_v0_5():
+    assert len(LVIS_V0_5_CATEGORIES) == 1230
+    cat_ids = [k["id"] for k in LVIS_V0_5_CATEGORIES]
+    assert min(cat_ids) == 1 and max(cat_ids) == len(
+        cat_ids
+    ), "Category ids are not in [1, #categories], as expected"
+    # Ensure that the category list is sorted by id
+    lvis_categories = sorted(LVIS_V0_5_CATEGORIES, key=lambda x: x["id"])
+    thing_classes = [k["synonyms"][0] for k in lvis_categories]
+    meta = {"thing_classes": thing_classes}
+    return meta
+
+
+def _get_lvis_instances_meta_v1():
+    assert len(LVIS_V1_CATEGORIES) == 1203
+    cat_ids = [k["id"] for k in LVIS_V1_CATEGORIES]
+    assert min(cat_ids) == 1 and max(cat_ids) == len(
+        cat_ids
+    ), "Category ids are not in [1, #categories], as expected"
+    # Ensure that the category list is sorted by id
+    lvis_categories = sorted(LVIS_V1_CATEGORIES, key=lambda x: x["id"])
+    thing_classes = [k["synonyms"][0] for k in lvis_categories]
+    meta = {"thing_classes": thing_classes}
+    return meta
+
+
+if __name__ == "__main__":
+    """
+    Test the LVIS json dataset loader.
+
+    Usage:
+        python -m detectron2.data.datasets.lvis \
+            path/to/json path/to/image_root dataset_name vis_limit
+    """
+    import sys
+    import numpy as np
+    from detectron2.utils.logger import setup_logger
+    from PIL import Image
+    import detectron2.data.datasets  # noqa # add pre-defined metadata
+    from detectron2.utils.visualizer import Visualizer
+
+    logger = setup_logger(name=__name__)
+    meta = MetadataCatalog.get(sys.argv[3])
+
+    dicts = load_lvis_json(sys.argv[1], sys.argv[2], sys.argv[3])
+    logger.info("Done loading {} samples.".format(len(dicts)))
+
+    dirname = "lvis-data-vis"
+    os.makedirs(dirname, exist_ok=True)
+    for d in dicts[: int(sys.argv[4])]:
+        img = np.array(Image.open(d["file_name"]))
+        visualizer = Visualizer(img, metadata=meta)
+        vis = visualizer.draw_dataset_dict(d)
+        fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
+        vis.save(fpath)
diff --git a/src/sts/detectron2/data/datasets/lvis_v0_5_categories.py b/src/sts/detectron2/data/datasets/lvis_v0_5_categories.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3dab6198da614937b08682f4c9edf52bdf1d236
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/lvis_v0_5_categories.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Autogen with
+# with open("lvis_v0.5_val.json", "r") as f:
+#     a = json.load(f)
+# c = a["categories"]
+# for x in c:
+#     del x["image_count"]
+#     del x["instance_count"]
+# LVIS_CATEGORIES = repr(c) + "  # noqa"
+
+# fmt: off
+LVIS_CATEGORIES = [{'frequency': 'r', 'id': 1, 'synset': 'acorn.n.01', 'synonyms': ['acorn'], 'def': 'nut from an oak tree', 'name': 'acorn'}, {'frequency': 'c', 'id': 2, 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'id': 3, 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'id': 4, 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'c', 'id': 5, 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'id': 6, 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'r', 'id': 7, 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'id': 8, 'synset': 'almond.n.02', 'synonyms': ['almond'], 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'id': 9, 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'r', 'id': 10, 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'id': 11, 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'id': 12, 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'id': 13, 'synset': 'apple.n.01', 'synonyms': ['apple'], 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'id': 14, 'synset': 'apple_juice.n.01', 'synonyms': ['apple_juice'], 'def': 'the juice of apples', 'name': 'apple_juice'}, {'frequency': 'r', 'id': 15, 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'id': 16, 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'id': 17, 'synset': 'apron.n.01', 'synonyms': ['apron'], 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'id': 18, 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'c', 'id': 19, 'synset': 'armband.n.02', 'synonyms': ['armband'], 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'id': 20, 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'id': 21, 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'id': 22, 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'id': 23, 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'id': 24, 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'id': 25, 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'id': 26, 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'id': 27, 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'c', 'id': 28, 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'id': 29, 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'id': 30, 'synset': 'awning.n.01', 'synonyms': ['awning'], 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'id': 31, 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'f', 'id': 32, 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'id': 33, 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'id': 34, 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'id': 35, 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'id': 36, 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'id': 37, 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'id': 38, 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'id': 39, 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'id': 40, 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'id': 41, 'synset': 'ball.n.06', 'synonyms': ['ball'], 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'id': 42, 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'id': 43, 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'id': 44, 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'id': 45, 'synset': 'banana.n.02', 'synonyms': ['banana'], 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'r', 'id': 46, 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'id': 47, 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'c', 'id': 48, 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'id': 49, 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'id': 50, 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'id': 51, 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'id': 52, 'synset': 'barge.n.01', 'synonyms': ['barge'], 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'id': 53, 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'id': 54, 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'id': 55, 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'id': 56, 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'id': 57, 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'id': 58, 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'id': 59, 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'id': 60, 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'id': 61, 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'id': 62, 'synset': 'basket.n.03', 'synonyms': ['basketball_hoop'], 'def': 'metal hoop supporting a net through which players try to throw the basketball', 'name': 'basketball_hoop'}, {'frequency': 'c', 'id': 63, 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'id': 64, 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'r', 'id': 65, 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'id': 66, 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'id': 67, 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'id': 68, 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'id': 69, 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'id': 70, 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'id': 71, 'synset': 'battery.n.02', 'synonyms': ['battery'], 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'id': 72, 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'id': 73, 'synset': 'bead.n.01', 'synonyms': ['bead'], 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'r', 'id': 74, 'synset': 'beaker.n.01', 'synonyms': ['beaker'], 'def': 'a flatbottomed jar made of glass or plastic; used for chemistry', 'name': 'beaker'}, {'frequency': 'c', 'id': 75, 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'id': 76, 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'id': 77, 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'id': 78, 'synset': 'bear.n.01', 'synonyms': ['bear'], 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'id': 79, 'synset': 'bed.n.01', 'synonyms': ['bed'], 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'c', 'id': 80, 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'id': 81, 'synset': 'beef.n.01', 'synonyms': ['cow'], 'def': 'cattle that are reared for their meat', 'name': 'cow'}, {'frequency': 'c', 'id': 82, 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'id': 83, 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'id': 84, 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'id': 85, 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'id': 86, 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'id': 87, 'synset': 'bell.n.01', 'synonyms': ['bell'], 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'id': 88, 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'id': 89, 'synset': 'belt.n.02', 'synonyms': ['belt'], 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'id': 90, 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'id': 91, 'synset': 'bench.n.01', 'synonyms': ['bench'], 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'id': 92, 'synset': 'beret.n.01', 'synonyms': ['beret'], 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'id': 93, 'synset': 'bib.n.02', 'synonyms': ['bib'], 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'id': 94, 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'id': 95, 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'id': 96, 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'c', 'id': 97, 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'id': 98, 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'id': 99, 'synset': 'bird.n.01', 'synonyms': ['bird'], 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'r', 'id': 100, 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'r', 'id': 101, 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'id': 102, 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'id': 103, 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'id': 104, 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'id': 105, 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'id': 106, 'synset': 'biscuit.n.01', 'synonyms': ['biscuit_(bread)'], 'def': 'small round bread leavened with baking-powder or soda', 'name': 'biscuit_(bread)'}, {'frequency': 'r', 'id': 107, 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'id': 108, 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'id': 109, 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'id': 110, 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'id': 111, 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'id': 112, 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'id': 113, 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'c', 'id': 114, 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'c', 'id': 115, 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'id': 116, 'synset': 'boar.n.02', 'synonyms': ['boar'], 'def': 'an uncastrated male hog', 'name': 'boar'}, {'frequency': 'r', 'id': 117, 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'id': 118, 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'c', 'id': 119, 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'r', 'id': 120, 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'id': 121, 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'id': 122, 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'id': 123, 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'id': 124, 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'id': 125, 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'id': 126, 'synset': 'book.n.01', 'synonyms': ['book'], 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'r', 'id': 127, 'synset': 'book_bag.n.01', 'synonyms': ['book_bag'], 'def': 'a bag in which students carry their books', 'name': 'book_bag'}, {'frequency': 'c', 'id': 128, 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'id': 129, 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'id': 130, 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'id': 131, 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'id': 132, 'synset': 'boot.n.01', 'synonyms': ['boot'], 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'id': 133, 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'id': 134, 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'id': 135, 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'id': 136, 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'id': 137, 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'id': 138, 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'id': 139, 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'id': 140, 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'id': 141, 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'id': 142, 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'r', 'id': 143, 'synset': 'bowling_pin.n.01', 'synonyms': ['bowling_pin'], 'def': 'a club-shaped wooden object used in bowling', 'name': 'bowling_pin'}, {'frequency': 'r', 'id': 144, 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'id': 145, 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'id': 146, 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'id': 147, 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'id': 148, 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'id': 149, 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'r', 'id': 150, 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'c', 'id': 151, 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'id': 152, 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'c', 'id': 153, 'synset': 'bristle_brush.n.01', 'synonyms': ['bristle_brush'], 'def': 'a brush that is made with the short stiff hairs of an animal or plant', 'name': 'bristle_brush'}, {'frequency': 'f', 'id': 154, 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'id': 155, 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'id': 156, 'synset': 'broom.n.01', 'synonyms': ['broom'], 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'id': 157, 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'id': 158, 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'id': 159, 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'id': 160, 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'id': 161, 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'id': 162, 'synset': 'bull.n.11', 'synonyms': ['bull'], 'def': 'mature male cow', 'name': 'bull'}, {'frequency': 'r', 'id': 163, 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'id': 164, 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'id': 165, 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'id': 166, 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'id': 167, 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'id': 168, 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'r', 'id': 169, 'synset': 'bully_beef.n.01', 'synonyms': ['corned_beef', 'corn_beef'], 'def': 'beef cured or pickled in brine', 'name': 'corned_beef'}, {'frequency': 'f', 'id': 170, 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'id': 171, 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'id': 172, 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'id': 173, 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'id': 174, 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'id': 175, 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'c', 'id': 176, 'synset': 'butcher_knife.n.01', 'synonyms': ['butcher_knife'], 'def': 'a large sharp knife for cutting or trimming meat', 'name': 'butcher_knife'}, {'frequency': 'c', 'id': 177, 'synset': 'butter.n.01', 'synonyms': ['butter'], 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'id': 178, 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'id': 179, 'synset': 'button.n.01', 'synonyms': ['button'], 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'id': 180, 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'id': 181, 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'r', 'id': 182, 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'id': 183, 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'id': 184, 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'id': 185, 'synset': 'cake.n.03', 'synonyms': ['cake'], 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'id': 186, 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'id': 187, 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'id': 188, 'synset': 'calf.n.01', 'synonyms': ['calf'], 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'id': 189, 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'id': 190, 'synset': 'camel.n.01', 'synonyms': ['camel'], 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'id': 191, 'synset': 'camera.n.01', 'synonyms': ['camera'], 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'id': 192, 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'id': 193, 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'id': 194, 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'id': 195, 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'r', 'id': 196, 'synset': 'candelabrum.n.01', 'synonyms': ['candelabrum', 'candelabra'], 'def': 'branched candlestick; ornamental; has several lights', 'name': 'candelabrum'}, {'frequency': 'f', 'id': 197, 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'id': 198, 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'id': 199, 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'id': 200, 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'id': 201, 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'id': 202, 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'r', 'id': 203, 'synset': 'cannon.n.02', 'synonyms': ['cannon'], 'def': 'heavy gun fired from a tank', 'name': 'cannon'}, {'frequency': 'c', 'id': 204, 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'r', 'id': 205, 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'id': 206, 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'c', 'id': 207, 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'id': 208, 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'r', 'id': 209, 'synset': 'cape.n.02', 'synonyms': ['cape'], 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'id': 210, 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'id': 211, 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'id': 212, 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'def': 'a wheeled vehicle adapted to the rails of railroad', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'id': 213, 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'id': 214, 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'id': 215, 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'id': 216, 'synset': 'card.n.03', 'synonyms': ['card'], 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'r', 'id': 217, 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'id': 218, 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'id': 219, 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'id': 220, 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'id': 221, 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'c', 'id': 222, 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'id': 223, 'synset': 'cart.n.01', 'synonyms': ['cart'], 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'id': 224, 'synset': 'carton.n.02', 'synonyms': ['carton'], 'def': 'a box made of cardboard; opens by flaps on top', 'name': 'carton'}, {'frequency': 'c', 'id': 225, 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'id': 226, 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'id': 227, 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'id': 228, 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'id': 229, 'synset': 'cat.n.01', 'synonyms': ['cat'], 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'c', 'id': 230, 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'r', 'id': 231, 'synset': 'caviar.n.01', 'synonyms': ['caviar', 'caviare'], 'def': "salted roe of sturgeon or other large fish; usually served as an hors d'oeuvre", 'name': 'caviar'}, {'frequency': 'c', 'id': 232, 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'id': 233, 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'c', 'id': 234, 'synset': 'celery.n.01', 'synonyms': ['celery'], 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'id': 235, 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'id': 236, 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'id': 237, 'synset': 'chair.n.01', 'synonyms': ['chair'], 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'id': 238, 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'id': 239, 'synset': 'champagne.n.01', 'synonyms': ['champagne'], 'def': 'a white sparkling wine produced in Champagne or resembling that produced there', 'name': 'champagne'}, {'frequency': 'f', 'id': 240, 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'id': 241, 'synset': 'chap.n.04', 'synonyms': ['chap'], 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'id': 242, 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'id': 243, 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'id': 244, 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'id': 245, 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'r', 'id': 246, 'synset': 'chest_of_drawers.n.01', 'synonyms': ['chest_of_drawers_(furniture)', 'bureau_(furniture)', 'chest_(furniture)'], 'def': 'furniture with drawers for keeping clothes', 'name': 'chest_of_drawers_(furniture)'}, {'frequency': 'c', 'id': 247, 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'id': 248, 'synset': 'chicken_wire.n.01', 'synonyms': ['chicken_wire'], 'def': 'a galvanized wire network with a hexagonal mesh; used to build fences', 'name': 'chicken_wire'}, {'frequency': 'r', 'id': 249, 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'r', 'id': 250, 'synset': 'chihuahua.n.03', 'synonyms': ['Chihuahua'], 'def': 'an old breed of tiny short-haired dog with protruding eyes from Mexico', 'name': 'Chihuahua'}, {'frequency': 'r', 'id': 251, 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'id': 252, 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'id': 253, 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'id': 254, 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'id': 255, 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'id': 256, 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'id': 257, 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'id': 258, 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'id': 259, 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'id': 260, 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'def': 'necklace that fits tightly around the neck', 'name': 'choker'}, {'frequency': 'f', 'id': 261, 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'c', 'id': 262, 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'id': 263, 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'id': 264, 'synset': 'chute.n.02', 'synonyms': ['slide'], 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'id': 265, 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'id': 266, 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'c', 'id': 267, 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'id': 268, 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'id': 269, 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'id': 270, 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'r', 'id': 271, 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'id': 272, 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'id': 273, 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'id': 274, 'synset': 'clip.n.03', 'synonyms': ['clip'], 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'id': 275, 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'f', 'id': 276, 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'id': 277, 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'id': 278, 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'id': 279, 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'id': 280, 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'id': 281, 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'id': 282, 'synset': 'coat.n.01', 'synonyms': ['coat'], 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'id': 283, 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'r', 'id': 284, 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'id': 285, 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'c', 'id': 286, 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'r', 'id': 287, 'synset': 'coffee_filter.n.01', 'synonyms': ['coffee_filter'], 'def': 'filter (usually of paper) that passes the coffee and retains the coffee grounds', 'name': 'coffee_filter'}, {'frequency': 'f', 'id': 288, 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'id': 289, 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'id': 290, 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'id': 291, 'synset': 'coil.n.05', 'synonyms': ['coil'], 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'id': 292, 'synset': 'coin.n.01', 'synonyms': ['coin'], 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'r', 'id': 293, 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'id': 294, 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'id': 295, 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'id': 296, 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'id': 297, 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'id': 298, 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'f', 'id': 299, 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'r', 'id': 300, 'synset': 'concrete_mixer.n.01', 'synonyms': ['concrete_mixer', 'cement_mixer'], 'def': 'a machine with a large revolving drum in which cement/concrete is mixed', 'name': 'concrete_mixer'}, {'frequency': 'f', 'id': 301, 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'id': 302, 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'id': 303, 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'id': 304, 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'c', 'id': 305, 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'id': 306, 'synset': 'cookie_jar.n.01', 'synonyms': ['cookie_jar', 'cooky_jar'], 'def': 'a jar in which cookies are kept (and sometimes money is hidden)', 'name': 'cookie_jar'}, {'frequency': 'r', 'id': 307, 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'id': 308, 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'c', 'id': 309, 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'id': 310, 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'r', 'id': 311, 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'c', 'id': 312, 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'def': 'ears of corn that can be prepared and served for human food', 'name': 'edible_corn'}, {'frequency': 'r', 'id': 313, 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'id': 314, 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'id': 315, 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'id': 316, 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'r', 'id': 317, 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'r', 'id': 318, 'synset': 'cos.n.02', 'synonyms': ['romaine_lettuce'], 'def': 'lettuce with long dark-green leaves in a loosely packed elongated head', 'name': 'romaine_lettuce'}, {'frequency': 'c', 'id': 319, 'synset': 'costume.n.04', 'synonyms': ['costume'], 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'id': 320, 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'id': 321, 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'r', 'id': 322, 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'id': 323, 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'r', 'id': 324, 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'c', 'id': 325, 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'id': 326, 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'id': 327, 'synset': 'crate.n.01', 'synonyms': ['crate'], 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'r', 'id': 328, 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'id': 329, 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'r', 'id': 330, 'synset': 'credit_card.n.01', 'synonyms': ['credit_card', 'charge_card', 'debit_card'], 'def': 'a card, usually plastic, used to pay for goods and services', 'name': 'credit_card'}, {'frequency': 'c', 'id': 331, 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'id': 332, 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'id': 333, 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'def': 'an earthen jar (made of baked clay)', 'name': 'crock_pot'}, {'frequency': 'f', 'id': 334, 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'id': 335, 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'r', 'id': 336, 'synset': 'crow.n.01', 'synonyms': ['crow'], 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'c', 'id': 337, 'synset': 'crown.n.04', 'synonyms': ['crown'], 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'id': 338, 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'id': 339, 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'id': 340, 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'c', 'id': 341, 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'r', 'id': 342, 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'id': 343, 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'r', 'id': 344, 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'id': 345, 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'id': 346, 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'id': 347, 'synset': 'cup.n.01', 'synonyms': ['cup'], 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'id': 348, 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'def': 'a metal vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'c', 'id': 349, 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'id': 350, 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'id': 351, 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'id': 352, 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'id': 353, 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'id': 354, 'synset': 'custard.n.01', 'synonyms': ['custard'], 'def': 'sweetened mixture of milk and eggs baked or boiled or frozen', 'name': 'custard'}, {'frequency': 'c', 'id': 355, 'synset': 'cutter.n.06', 'synonyms': ['cutting_tool'], 'def': 'a cutting implement; a tool for cutting', 'name': 'cutting_tool'}, {'frequency': 'r', 'id': 356, 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'id': 357, 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'id': 358, 'synset': 'dachshund.n.01', 'synonyms': ['dachshund', 'dachsie', 'badger_dog'], 'def': 'small long-bodied short-legged breed of dog having a short sleek coat and long drooping ears', 'name': 'dachshund'}, {'frequency': 'r', 'id': 359, 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'id': 360, 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'id': 361, 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'id': 362, 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'id': 363, 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'id': 364, 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'id': 365, 'synset': 'desk.n.01', 'synonyms': ['desk'], 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'id': 366, 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'id': 367, 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'id': 368, 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'def': 'a daily written record of (usually personal) experiences and observations', 'name': 'diary'}, {'frequency': 'r', 'id': 369, 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'id': 370, 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'id': 371, 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'id': 372, 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'c', 'id': 373, 'synset': 'dish.n.01', 'synonyms': ['dish'], 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'id': 374, 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'id': 375, 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'def': 'a cloth for washing dishes', 'name': 'dishrag'}, {'frequency': 'c', 'id': 376, 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'id': 377, 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'id': 378, 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid'], 'def': 'a low-sudsing detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'r', 'id': 379, 'synset': 'diskette.n.01', 'synonyms': ['diskette', 'floppy', 'floppy_disk'], 'def': 'a small plastic magnetic disk enclosed in a stiff envelope used to store data', 'name': 'diskette'}, {'frequency': 'c', 'id': 380, 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'c', 'id': 381, 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'id': 382, 'synset': 'dog.n.01', 'synonyms': ['dog'], 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'id': 383, 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'c', 'id': 384, 'synset': 'doll.n.01', 'synonyms': ['doll'], 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'id': 385, 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'id': 386, 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'id': 387, 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'r', 'id': 388, 'synset': 'domino.n.03', 'synonyms': ['eye_mask'], 'def': 'a mask covering the upper part of the face but with holes for the eyes', 'name': 'eye_mask'}, {'frequency': 'r', 'id': 389, 'synset': 'doorbell.n.01', 'synonyms': ['doorbell', 'buzzer'], 'def': 'a button at an outer door that gives a ringing or buzzing signal when pushed', 'name': 'doorbell'}, {'frequency': 'f', 'id': 390, 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'id': 391, 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'id': 392, 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'id': 393, 'synset': 'dove.n.01', 'synonyms': ['dove'], 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'id': 394, 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'id': 395, 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'id': 396, 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'id': 397, 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'id': 398, 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'c', 'id': 399, 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'c', 'id': 400, 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'id': 401, 'synset': 'drill.n.01', 'synonyms': ['drill'], 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'id': 402, 'synset': 'drinking_fountain.n.01', 'synonyms': ['drinking_fountain'], 'def': 'a public fountain to provide a jet of drinking water', 'name': 'drinking_fountain'}, {'frequency': 'r', 'id': 403, 'synset': 'drone.n.04', 'synonyms': ['drone'], 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'id': 404, 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'id': 405, 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'id': 406, 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'id': 407, 'synset': 'duck.n.01', 'synonyms': ['duck'], 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'r', 'id': 408, 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'id': 409, 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'id': 410, 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'def': 'a large cylindrical bag of heavy cloth', 'name': 'duffel_bag'}, {'frequency': 'r', 'id': 411, 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'id': 412, 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'id': 413, 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'r', 'id': 414, 'synset': 'dutch_oven.n.02', 'synonyms': ['Dutch_oven'], 'def': 'iron or earthenware cooking pot; used for stews', 'name': 'Dutch_oven'}, {'frequency': 'c', 'id': 415, 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'id': 416, 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'id': 417, 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'id': 418, 'synset': 'earring.n.01', 'synonyms': ['earring'], 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'id': 419, 'synset': 'easel.n.01', 'synonyms': ['easel'], 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'id': 420, 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'id': 421, 'synset': 'eel.n.01', 'synonyms': ['eel'], 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'id': 422, 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'id': 423, 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'id': 424, 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'id': 425, 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'id': 426, 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'id': 427, 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'id': 428, 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'id': 429, 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'r', 'id': 430, 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'id': 431, 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'id': 432, 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'id': 433, 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'id': 434, 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'id': 435, 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'id': 436, 'synset': 'fan.n.01', 'synonyms': ['fan'], 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'id': 437, 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'id': 438, 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'id': 439, 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'id': 440, 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'r', 'id': 441, 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'id': 442, 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'id': 443, 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'id': 444, 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'id': 445, 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'id': 446, 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'id': 447, 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'c', 'id': 448, 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'c', 'id': 449, 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'id': 450, 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'id': 451, 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'id': 452, 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'c', 'id': 453, 'synset': 'fish.n.01', 'synonyms': ['fish'], 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'r', 'id': 454, 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'id': 455, 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'r', 'id': 456, 'synset': 'fishing_boat.n.01', 'synonyms': ['fishing_boat', 'fishing_vessel'], 'def': 'a vessel for fishing', 'name': 'fishing_boat'}, {'frequency': 'c', 'id': 457, 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'id': 458, 'synset': 'flag.n.01', 'synonyms': ['flag'], 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'id': 459, 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'id': 460, 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'id': 461, 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'r', 'id': 462, 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'id': 463, 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'id': 464, 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'id': 465, 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'id': 466, 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'id': 467, 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'id': 468, 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'r', 'id': 469, 'synset': 'foal.n.01', 'synonyms': ['foal'], 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'id': 470, 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'id': 471, 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'id': 472, 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'id': 473, 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'id': 474, 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'id': 475, 'synset': 'fork.n.01', 'synonyms': ['fork'], 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'r', 'id': 476, 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'r', 'id': 477, 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'r', 'id': 478, 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'id': 479, 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'def': 'anything that freshens', 'name': 'freshener'}, {'frequency': 'f', 'id': 480, 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'id': 481, 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'id': 482, 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'r', 'id': 483, 'synset': 'fruit_salad.n.01', 'synonyms': ['fruit_salad'], 'def': 'salad composed of fruits', 'name': 'fruit_salad'}, {'frequency': 'c', 'id': 484, 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'id': 485, 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'id': 486, 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'c', 'id': 487, 'synset': 'futon.n.01', 'synonyms': ['futon'], 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'id': 488, 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'id': 489, 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'id': 490, 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'id': 491, 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'id': 492, 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'id': 493, 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'id': 494, 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'id': 495, 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'r', 'id': 496, 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'id': 497, 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'id': 498, 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'c', 'id': 499, 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'id': 500, 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'id': 501, 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'id': 502, 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'id': 503, 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'id': 504, 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'id': 505, 'synset': 'globe.n.03', 'synonyms': ['globe'], 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'id': 506, 'synset': 'glove.n.02', 'synonyms': ['glove'], 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'id': 507, 'synset': 'goat.n.01', 'synonyms': ['goat'], 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'id': 508, 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'id': 509, 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'r', 'id': 510, 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'id': 511, 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'id': 512, 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'id': 513, 'synset': 'goose.n.01', 'synonyms': ['goose'], 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'id': 514, 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'id': 515, 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'r', 'id': 516, 'synset': 'gown.n.04', 'synonyms': ['surgical_gown', 'scrubs_(surgical_clothing)'], 'def': 'protective garment worn by surgeons during operations', 'name': 'surgical_gown'}, {'frequency': 'f', 'id': 517, 'synset': 'grape.n.01', 'synonyms': ['grape'], 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'r', 'id': 518, 'synset': 'grasshopper.n.01', 'synonyms': ['grasshopper'], 'def': 'plant-eating insect with hind legs adapted for leaping', 'name': 'grasshopper'}, {'frequency': 'c', 'id': 519, 'synset': 'grater.n.01', 'synonyms': ['grater'], 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'id': 520, 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'id': 521, 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'c', 'id': 522, 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'c', 'id': 523, 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'id': 524, 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'r', 'id': 525, 'synset': 'grillroom.n.01', 'synonyms': ['grillroom', 'grill_(restaurant)'], 'def': 'a restaurant where food is cooked on a grill', 'name': 'grillroom'}, {'frequency': 'r', 'id': 526, 'synset': 'grinder.n.04', 'synonyms': ['grinder_(tool)'], 'def': 'a machine tool that polishes metal', 'name': 'grinder_(tool)'}, {'frequency': 'r', 'id': 527, 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'id': 528, 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'id': 529, 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'r', 'id': 530, 'synset': 'guacamole.n.01', 'synonyms': ['guacamole'], 'def': 'a dip made of mashed avocado mixed with chopped onions and other seasonings', 'name': 'guacamole'}, {'frequency': 'f', 'id': 531, 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'id': 532, 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'id': 533, 'synset': 'gun.n.01', 'synonyms': ['gun'], 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'r', 'id': 534, 'synset': 'hair_spray.n.01', 'synonyms': ['hair_spray'], 'def': 'substance sprayed on the hair to hold it in place', 'name': 'hair_spray'}, {'frequency': 'c', 'id': 535, 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'id': 536, 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'id': 537, 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'f', 'id': 538, 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'id': 539, 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'id': 540, 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'r', 'id': 541, 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'id': 542, 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'r', 'id': 543, 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'c', 'id': 544, 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'id': 545, 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'id': 546, 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'id': 547, 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'id': 548, 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'id': 549, 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'id': 550, 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'id': 551, 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'id': 552, 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'id': 553, 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'id': 554, 'synset': 'hat.n.01', 'synonyms': ['hat'], 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'id': 555, 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'r', 'id': 556, 'synset': 'hatch.n.03', 'synonyms': ['hatch'], 'def': 'a movable barrier covering a hatchway', 'name': 'hatch'}, {'frequency': 'c', 'id': 557, 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'def': 'a garment that covers the head and face', 'name': 'veil'}, {'frequency': 'f', 'id': 558, 'synset': 'headband.n.01', 'synonyms': ['headband'], 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'id': 559, 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'id': 560, 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'id': 561, 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'id': 562, 'synset': 'headset.n.01', 'synonyms': ['headset'], 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'id': 563, 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'r', 'id': 564, 'synset': 'hearing_aid.n.02', 'synonyms': ['hearing_aid'], 'def': 'an acoustic device used to direct sound to the ear of a hearing-impaired person', 'name': 'hearing_aid'}, {'frequency': 'c', 'id': 565, 'synset': 'heart.n.02', 'synonyms': ['heart'], 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'id': 566, 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'id': 567, 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'id': 568, 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'id': 569, 'synset': 'heron.n.02', 'synonyms': ['heron'], 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'id': 570, 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'id': 571, 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'id': 572, 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'id': 573, 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'id': 574, 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'id': 575, 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'id': 576, 'synset': 'honey.n.01', 'synonyms': ['honey'], 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'id': 577, 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'id': 578, 'synset': 'hook.n.05', 'synonyms': ['hook'], 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'f', 'id': 579, 'synset': 'horse.n.01', 'synonyms': ['horse'], 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'id': 580, 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'id': 581, 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'id': 582, 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'id': 583, 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'id': 584, 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'id': 585, 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'r', 'id': 586, 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'id': 587, 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'c', 'id': 588, 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'id': 589, 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'id': 590, 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'id': 591, 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'id': 592, 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'id': 593, 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'r', 'id': 594, 'synset': 'ice_tea.n.01', 'synonyms': ['ice_tea', 'iced_tea'], 'def': 'strong tea served over ice', 'name': 'ice_tea'}, {'frequency': 'c', 'id': 595, 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'id': 596, 'synset': 'incense.n.01', 'synonyms': ['incense'], 'def': 'a substance that produces a fragrant odor when burned', 'name': 'incense'}, {'frequency': 'r', 'id': 597, 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'c', 'id': 598, 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'id': 599, 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'r', 'id': 600, 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'id': 601, 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'r', 'id': 602, 'synset': 'jam.n.01', 'synonyms': ['jam'], 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'id': 603, 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'id': 604, 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'id': 605, 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'id': 606, 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'id': 607, 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'c', 'id': 608, 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'id': 609, 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'r', 'id': 610, 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'id': 611, 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'id': 612, 'synset': 'keg.n.02', 'synonyms': ['keg'], 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'id': 613, 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'id': 614, 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'id': 615, 'synset': 'key.n.01', 'synonyms': ['key'], 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'id': 616, 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'r', 'id': 617, 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'id': 618, 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'id': 619, 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'c', 'id': 620, 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'id': 621, 'synset': 'kite.n.03', 'synonyms': ['kite'], 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'id': 622, 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'id': 623, 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'id': 624, 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'id': 625, 'synset': 'knife.n.01', 'synonyms': ['knife'], 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'id': 626, 'synset': 'knight.n.02', 'synonyms': ['knight_(chess_piece)', 'horse_(chess_piece)'], 'def': 'a chess game piece shaped to resemble the head of a horse', 'name': 'knight_(chess_piece)'}, {'frequency': 'r', 'id': 627, 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'id': 628, 'synset': 'knob.n.02', 'synonyms': ['knob'], 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'id': 629, 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'id': 630, 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'id': 631, 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'id': 632, 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'id': 633, 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'r', 'id': 634, 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'c', 'id': 635, 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'id': 636, 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'id': 637, 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'id': 638, 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'id': 639, 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'id': 640, 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'id': 641, 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'id': 642, 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'id': 643, 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'c', 'id': 644, 'synset': 'latch.n.02', 'synonyms': ['latch'], 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'id': 645, 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'id': 646, 'synset': 'leather.n.01', 'synonyms': ['leather'], 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'id': 647, 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'id': 648, 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'f', 'id': 649, 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'id': 650, 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'id': 651, 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'id': 652, 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'id': 653, 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'id': 654, 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'id': 655, 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'def': 'glass bulb or tube shaped electric device that emits light (DO NOT MARK LAMPS AS A WHOLE)', 'name': 'lightbulb'}, {'frequency': 'r', 'id': 656, 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'c', 'id': 657, 'synset': 'lime.n.06', 'synonyms': ['lime'], 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'id': 658, 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'r', 'id': 659, 'synset': 'linen.n.02', 'synonyms': ['linen_paper'], 'def': 'a high-quality paper made of linen fibers or with a linen finish', 'name': 'linen_paper'}, {'frequency': 'c', 'id': 660, 'synset': 'lion.n.01', 'synonyms': ['lion'], 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'id': 661, 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'c', 'id': 662, 'synset': 'lipstick.n.01', 'synonyms': ['lipstick', 'lip_rouge'], 'def': 'makeup that is used to color the lips', 'name': 'lipstick'}, {'frequency': 'r', 'id': 663, 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'def': 'an alcoholic beverage that is distilled rather than fermented', 'name': 'liquor'}, {'frequency': 'r', 'id': 664, 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'r', 'id': 665, 'synset': 'loafer.n.02', 'synonyms': ['Loafer_(type_of_shoe)'], 'def': 'a low leather step-in shoe', 'name': 'Loafer_(type_of_shoe)'}, {'frequency': 'f', 'id': 666, 'synset': 'log.n.01', 'synonyms': ['log'], 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'id': 667, 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'c', 'id': 668, 'synset': 'lotion.n.01', 'synonyms': ['lotion'], 'def': 'any of various cosmetic preparations that are applied to the skin', 'name': 'lotion'}, {'frequency': 'f', 'id': 669, 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'id': 670, 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'id': 671, 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'id': 672, 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'id': 673, 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'r', 'id': 674, 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'c', 'id': 675, 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'id': 676, 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'id': 677, 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'c', 'id': 678, 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'id': 679, 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'id': 680, 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'c', 'id': 681, 'synset': 'map.n.01', 'synonyms': ['map'], 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'c', 'id': 682, 'synset': 'marker.n.03', 'synonyms': ['marker'], 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'id': 683, 'synset': 'martini.n.01', 'synonyms': ['martini'], 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'id': 684, 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'id': 685, 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'id': 686, 'synset': 'masher.n.02', 'synonyms': ['masher'], 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'id': 687, 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'id': 688, 'synset': 'mast.n.01', 'synonyms': ['mast'], 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'id': 689, 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'id': 690, 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'id': 691, 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'id': 692, 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'id': 693, 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'id': 694, 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'id': 695, 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'r', 'id': 696, 'synset': 'melon.n.01', 'synonyms': ['melon'], 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'id': 697, 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'id': 698, 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'id': 699, 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'id': 700, 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'c', 'id': 701, 'synset': 'milk.n.01', 'synonyms': ['milk'], 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'f', 'id': 702, 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'id': 703, 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'id': 704, 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'id': 705, 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'id': 706, 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'id': 707, 'synset': 'money.n.03', 'synonyms': ['money'], 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'id': 708, 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'id': 709, 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'id': 710, 'synset': 'motor.n.01', 'synonyms': ['motor'], 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'id': 711, 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'id': 712, 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'r', 'id': 713, 'synset': 'motorboat.n.01', 'synonyms': ['motorboat', 'powerboat'], 'def': 'a boat propelled by an internal-combustion engine', 'name': 'motorboat'}, {'frequency': 'f', 'id': 714, 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'id': 715, 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'r', 'id': 716, 'synset': 'mouse.n.01', 'synonyms': ['mouse_(animal_rodent)'], 'def': 'a small rodent with pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'name': 'mouse_(animal_rodent)'}, {'frequency': 'f', 'id': 717, 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'def': 'a computer input device that controls an on-screen pointer', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'id': 718, 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'id': 719, 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'id': 720, 'synset': 'mug.n.04', 'synonyms': ['mug'], 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'id': 721, 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'id': 722, 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'r', 'id': 723, 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'id': 724, 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'r', 'id': 725, 'synset': 'nameplate.n.01', 'synonyms': ['nameplate'], 'def': 'a plate bearing a name', 'name': 'nameplate'}, {'frequency': 'f', 'id': 726, 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'id': 727, 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'id': 728, 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'id': 729, 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'r', 'id': 730, 'synset': 'needle.n.03', 'synonyms': ['needle'], 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'id': 731, 'synset': 'nest.n.01', 'synonyms': ['nest'], 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'r', 'id': 732, 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'id': 733, 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'id': 734, 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'r', 'id': 735, 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'id': 736, 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'id': 737, 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'c', 'id': 738, 'synset': 'nut.n.03', 'synonyms': ['nut'], 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'id': 739, 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'c', 'id': 740, 'synset': 'oar.n.01', 'synonyms': ['oar'], 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'id': 741, 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'id': 742, 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'id': 743, 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'id': 744, 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'id': 745, 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'id': 746, 'synset': 'onion.n.01', 'synonyms': ['onion'], 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'id': 747, 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'id': 748, 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'r', 'id': 749, 'synset': 'oregano.n.01', 'synonyms': ['oregano', 'marjoram'], 'def': 'aromatic Eurasian perennial herb used in cooking and baking', 'name': 'oregano'}, {'frequency': 'c', 'id': 750, 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'c', 'id': 751, 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'def': 'thick cushion used as a seat', 'name': 'ottoman'}, {'frequency': 'c', 'id': 752, 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'id': 753, 'synset': 'owl.n.01', 'synonyms': ['owl'], 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'id': 754, 'synset': 'packet.n.03', 'synonyms': ['packet'], 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'id': 755, 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'id': 756, 'synset': 'pad.n.04', 'synonyms': ['pad'], 'def': 'a flat mass of soft material used for protection, stuffing, or comfort', 'name': 'pad'}, {'frequency': 'c', 'id': 757, 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'id': 758, 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'r', 'id': 759, 'synset': 'paintbox.n.01', 'synonyms': ['paintbox'], 'def': "a box containing a collection of cubes or tubes of artists' paint", 'name': 'paintbox'}, {'frequency': 'c', 'id': 760, 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'id': 761, 'synset': 'painting.n.01', 'synonyms': ['painting'], 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'c', 'id': 762, 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'id': 763, 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'id': 764, 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'id': 765, 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'id': 766, 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'id': 767, 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'id': 768, 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'r', 'id': 769, 'synset': 'paper_clip.n.01', 'synonyms': ['paperclip'], 'def': 'a wire or plastic clip for holding sheets of paper together', 'name': 'paperclip'}, {'frequency': 'f', 'id': 770, 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'id': 771, 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'id': 772, 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'id': 773, 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'id': 774, 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'r', 'id': 775, 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'id': 776, 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'r', 'id': 777, 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'r', 'id': 778, 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'id': 779, 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'id': 780, 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'id': 781, 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'id': 782, 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'r', 'id': 783, 'synset': 'passport.n.02', 'synonyms': ['passport'], 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'id': 784, 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'id': 785, 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'id': 786, 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'id': 787, 'synset': 'peach.n.03', 'synonyms': ['peach'], 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'id': 788, 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'c', 'id': 789, 'synset': 'pear.n.01', 'synonyms': ['pear'], 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'r', 'id': 790, 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'id': 791, 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'id': 792, 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'id': 793, 'synset': 'pen.n.01', 'synonyms': ['pen'], 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'c', 'id': 794, 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'id': 795, 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'id': 796, 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'id': 797, 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'id': 798, 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'id': 799, 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'id': 800, 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'c', 'id': 801, 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'id': 802, 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'id': 803, 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'id': 804, 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'id': 805, 'synset': 'person.n.01', 'synonyms': ['baby', 'child', 'boy', 'girl', 'man', 'woman', 'person', 'human'], 'def': 'a human being', 'name': 'baby'}, {'frequency': 'r', 'id': 806, 'synset': 'pet.n.01', 'synonyms': ['pet'], 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'r', 'id': 807, 'synset': 'petfood.n.01', 'synonyms': ['petfood', 'pet-food'], 'def': 'food prepared for animal pets', 'name': 'petfood'}, {'frequency': 'r', 'id': 808, 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'id': 809, 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'id': 810, 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'c', 'id': 811, 'synset': 'piano.n.01', 'synonyms': ['piano'], 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'id': 812, 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'id': 813, 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'id': 814, 'synset': 'pie.n.01', 'synonyms': ['pie'], 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'id': 815, 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'id': 816, 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'id': 817, 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'id': 818, 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'id': 819, 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'id': 820, 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'id': 821, 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'id': 822, 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'id': 823, 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'id': 824, 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'id': 825, 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'r', 'id': 826, 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'id': 827, 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'id': 828, 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'id': 829, 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'id': 830, 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'id': 831, 'synset': 'plate.n.04', 'synonyms': ['plate'], 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'id': 832, 'synset': 'platter.n.01', 'synonyms': ['platter'], 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'id': 833, 'synset': 'playing_card.n.01', 'synonyms': ['playing_card'], 'def': 'one of a pack of cards that are used to play card games', 'name': 'playing_card'}, {'frequency': 'r', 'id': 834, 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'id': 835, 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'id': 836, 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'id': 837, 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'id': 838, 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'id': 839, 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'id': 840, 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'r', 'id': 841, 'synset': 'police_van.n.01', 'synonyms': ['police_van', 'police_wagon', 'paddy_wagon', 'patrol_wagon'], 'def': 'van used by police to transport prisoners', 'name': 'police_van'}, {'frequency': 'f', 'id': 842, 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'id': 843, 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'id': 844, 'synset': 'pony.n.05', 'synonyms': ['pony'], 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'id': 845, 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'id': 846, 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'r', 'id': 847, 'synset': 'portrait.n.02', 'synonyms': ['portrait', 'portrayal'], 'def': 'any likeness of a person, in any medium', 'name': 'portrait'}, {'frequency': 'c', 'id': 848, 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'id': 849, 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'id': 850, 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'id': 851, 'synset': 'pot.n.01', 'synonyms': ['pot'], 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'id': 852, 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'id': 853, 'synset': 'potato.n.01', 'synonyms': ['potato'], 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'id': 854, 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'id': 855, 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'id': 856, 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'r', 'id': 857, 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'id': 858, 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'f', 'id': 859, 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'id': 860, 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'id': 861, 'synset': 'projector.n.02', 'synonyms': ['projector'], 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'id': 862, 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'id': 863, 'synset': 'prune.n.01', 'synonyms': ['prune'], 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'id': 864, 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'id': 865, 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'id': 866, 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'id': 867, 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'id': 868, 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'id': 869, 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'id': 870, 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'r', 'id': 871, 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'id': 872, 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'id': 873, 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'id': 874, 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'id': 875, 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'id': 876, 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'id': 877, 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'id': 878, 'synset': 'radar.n.01', 'synonyms': ['radar'], 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'c', 'id': 879, 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'id': 880, 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'id': 881, 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'id': 882, 'synset': 'raft.n.01', 'synonyms': ['raft'], 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'id': 883, 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'id': 884, 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'id': 885, 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'id': 886, 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'id': 887, 'synset': 'rat.n.01', 'synonyms': ['rat'], 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'id': 888, 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'id': 889, 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'id': 890, 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'def': 'car mirror that reflects the view out of the rear window', 'name': 'rearview_mirror'}, {'frequency': 'c', 'id': 891, 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'id': 892, 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'r', 'id': 893, 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'r', 'id': 894, 'synset': 'red_cabbage.n.02', 'synonyms': ['red_cabbage'], 'def': 'compact head of purplish-red leaves', 'name': 'red_cabbage'}, {'frequency': 'f', 'id': 895, 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'id': 896, 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'id': 897, 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'id': 898, 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'r', 'id': 899, 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'id': 900, 'synset': 'ring.n.08', 'synonyms': ['ring'], 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'id': 901, 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'id': 902, 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'id': 903, 'synset': 'robe.n.01', 'synonyms': ['robe'], 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'id': 904, 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'id': 905, 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'id': 906, 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'id': 907, 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'id': 908, 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'id': 909, 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'id': 910, 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'id': 911, 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'id': 912, 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'id': 913, 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'id': 914, 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'id': 915, 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'id': 916, 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'c', 'id': 917, 'synset': 'sail.n.01', 'synonyms': ['sail'], 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'c', 'id': 918, 'synset': 'salad.n.01', 'synonyms': ['salad'], 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'id': 919, 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'r', 'id': 920, 'synset': 'salami.n.01', 'synonyms': ['salami'], 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'r', 'id': 921, 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'id': 922, 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'r', 'id': 923, 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'id': 924, 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'id': 925, 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'id': 926, 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'id': 927, 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'id': 928, 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'id': 929, 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'id': 930, 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'id': 931, 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'id': 932, 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'id': 933, 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'id': 934, 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'id': 935, 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'id': 936, 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'id': 937, 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'c', 'id': 938, 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'c', 'id': 939, 'synset': 'scrambled_eggs.n.01', 'synonyms': ['scrambled_eggs'], 'def': 'eggs beaten and cooked to a soft firm consistency while stirring', 'name': 'scrambled_eggs'}, {'frequency': 'r', 'id': 940, 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'r', 'id': 941, 'synset': 'scratcher.n.03', 'synonyms': ['scratcher'], 'def': 'a device used for scratching', 'name': 'scratcher'}, {'frequency': 'c', 'id': 942, 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'c', 'id': 943, 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'id': 944, 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'r', 'id': 945, 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'r', 'id': 946, 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'id': 947, 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'id': 948, 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'r', 'id': 949, 'synset': 'seedling.n.01', 'synonyms': ['seedling'], 'def': 'young plant or tree grown from a seed', 'name': 'seedling'}, {'frequency': 'c', 'id': 950, 'synset': 'serving_dish.n.01', 'synonyms': ['serving_dish'], 'def': 'a dish used for serving food', 'name': 'serving_dish'}, {'frequency': 'r', 'id': 951, 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'r', 'id': 952, 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'id': 953, 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'r', 'id': 954, 'synset': 'shark.n.01', 'synonyms': ['shark'], 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'id': 955, 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'id': 956, 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'id': 957, 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'id': 958, 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'id': 959, 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'id': 960, 'synset': 'shears.n.01', 'synonyms': ['shears'], 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'id': 961, 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'id': 962, 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'id': 963, 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'r', 'id': 964, 'synset': 'shield.n.02', 'synonyms': ['shield'], 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'id': 965, 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'id': 966, 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'c', 'id': 967, 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'id': 968, 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'id': 969, 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'id': 970, 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'c', 'id': 971, 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'id': 972, 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'id': 973, 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'f', 'id': 974, 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'id': 975, 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'r', 'id': 976, 'synset': 'sieve.n.01', 'synonyms': ['sieve', 'screen_(sieve)'], 'def': 'a strainer for separating lumps from powdered material or grading particles', 'name': 'sieve'}, {'frequency': 'f', 'id': 977, 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'id': 978, 'synset': 'silo.n.01', 'synonyms': ['silo'], 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'id': 979, 'synset': 'sink.n.01', 'synonyms': ['sink'], 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'id': 980, 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'id': 981, 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'id': 982, 'synset': 'ski.n.01', 'synonyms': ['ski'], 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'id': 983, 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'id': 984, 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'id': 985, 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'id': 986, 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'c', 'id': 987, 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'id': 988, 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'id': 989, 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'id': 990, 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'id': 991, 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'id': 992, 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'id': 993, 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'id': 994, 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'id': 995, 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'id': 996, 'synset': 'soap.n.01', 'synonyms': ['soap'], 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'id': 997, 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'id': 998, 'synset': 'sock.n.01', 'synonyms': ['sock'], 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'r', 'id': 999, 'synset': 'soda_fountain.n.02', 'synonyms': ['soda_fountain'], 'def': 'an apparatus for dispensing soda water', 'name': 'soda_fountain'}, {'frequency': 'r', 'id': 1000, 'synset': 'soda_water.n.01', 'synonyms': ['carbonated_water', 'club_soda', 'seltzer', 'sparkling_water'], 'def': 'effervescent beverage artificially charged with carbon dioxide', 'name': 'carbonated_water'}, {'frequency': 'f', 'id': 1001, 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'id': 1002, 'synset': 'softball.n.01', 'synonyms': ['softball'], 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'id': 1003, 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'id': 1004, 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'c', 'id': 1005, 'synset': 'soup.n.01', 'synonyms': ['soup'], 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'id': 1006, 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'id': 1007, 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'id': 1008, 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'id': 1009, 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'id': 1010, 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'id': 1011, 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'id': 1012, 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'id': 1013, 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'id': 1014, 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'id': 1015, 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'r', 'id': 1016, 'synset': 'spider.n.01', 'synonyms': ['spider'], 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'c', 'id': 1017, 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'id': 1018, 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'id': 1019, 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'id': 1020, 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'id': 1021, 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'c', 'id': 1022, 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'r', 'id': 1023, 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'id': 1024, 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'id': 1025, 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'id': 1026, 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'r', 'id': 1027, 'synset': 'steamer.n.02', 'synonyms': ['steamer_(kitchen_appliance)'], 'def': 'a cooking utensil that can be used to cook food by steaming it', 'name': 'steamer_(kitchen_appliance)'}, {'frequency': 'f', 'id': 1028, 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'id': 1029, 'synset': 'stencil.n.01', 'synonyms': ['stencil'], 'def': 'a sheet of material (metal, plastic, etc.) that has been perforated with a pattern; ink or paint can pass through the perforations to create the printed pattern on the surface below', 'name': 'stencil'}, {'frequency': 'r', 'id': 1030, 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'id': 1031, 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'id': 1032, 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'id': 1033, 'synset': 'stew.n.02', 'synonyms': ['stew'], 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'id': 1034, 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'id': 1035, 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'c', 'id': 1036, 'synset': 'stocking.n.01', 'synonyms': ['stockings_(leg_wear)'], 'def': 'close-fitting hosiery to cover the foot and leg; come in matched pairs', 'name': 'stockings_(leg_wear)'}, {'frequency': 'f', 'id': 1037, 'synset': 'stool.n.01', 'synonyms': ['stool'], 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'id': 1038, 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'id': 1039, 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'id': 1040, 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'id': 1041, 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'id': 1042, 'synset': 'strap.n.01', 'synonyms': ['strap'], 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'id': 1043, 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'id': 1044, 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'id': 1045, 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'id': 1046, 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'id': 1047, 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'id': 1048, 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'def': 'a pointed tool for writing or drawing or engraving', 'name': 'stylus'}, {'frequency': 'r', 'id': 1049, 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'id': 1050, 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'id': 1051, 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'c', 'id': 1052, 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'id': 1053, 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'id': 1054, 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'id': 1055, 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'r', 'id': 1056, 'synset': 'sunscreen.n.01', 'synonyms': ['sunscreen', 'sunblock'], 'def': 'a cream spread on the skin; contains a chemical to filter out ultraviolet light and so protect from sunburn', 'name': 'sunscreen'}, {'frequency': 'f', 'id': 1057, 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'id': 1058, 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'id': 1059, 'synset': 'swab.n.02', 'synonyms': ['mop'], 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'id': 1060, 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'id': 1061, 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'id': 1062, 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'id': 1063, 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'id': 1064, 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'id': 1065, 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'id': 1066, 'synset': 'sword.n.01', 'synonyms': ['sword'], 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'id': 1067, 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'id': 1068, 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'id': 1069, 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'id': 1070, 'synset': 'table.n.02', 'synonyms': ['table'], 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'id': 1071, 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'id': 1072, 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'id': 1073, 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'id': 1074, 'synset': 'taco.n.02', 'synonyms': ['taco'], 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'id': 1075, 'synset': 'tag.n.02', 'synonyms': ['tag'], 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'id': 1076, 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'id': 1077, 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'id': 1078, 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'c', 'id': 1079, 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'id': 1080, 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'c', 'id': 1081, 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'id': 1082, 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'id': 1083, 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'id': 1084, 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'id': 1085, 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'id': 1086, 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'r', 'id': 1087, 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'id': 1088, 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'id': 1089, 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'c', 'id': 1090, 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'id': 1091, 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'id': 1092, 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'def': 'electronic device for communicating by voice over long distances', 'name': 'telephone'}, {'frequency': 'c', 'id': 1093, 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'id': 1094, 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'id': 1095, 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'id': 1096, 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'id': 1097, 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'id': 1098, 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'id': 1099, 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'id': 1100, 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'id': 1101, 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'id': 1102, 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'c', 'id': 1103, 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'id': 1104, 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'id': 1105, 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'id': 1106, 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'id': 1107, 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'id': 1108, 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'id': 1109, 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'id': 1110, 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'id': 1111, 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'r', 'id': 1112, 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'id': 1113, 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'id': 1114, 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'id': 1115, 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'c', 'id': 1116, 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'id': 1117, 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'id': 1118, 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'id': 1119, 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'c', 'id': 1120, 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'id': 1121, 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'id': 1122, 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'id': 1123, 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'c', 'id': 1124, 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'c', 'id': 1125, 'synset': 'top.n.09', 'synonyms': ['cover'], 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'id': 1126, 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'id': 1127, 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'id': 1128, 'synset': 'towel.n.01', 'synonyms': ['towel'], 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'id': 1129, 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'id': 1130, 'synset': 'toy.n.03', 'synonyms': ['toy'], 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'id': 1131, 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'id': 1132, 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'r', 'id': 1133, 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'c', 'id': 1134, 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'id': 1135, 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'id': 1136, 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'id': 1137, 'synset': 'tray.n.01', 'synonyms': ['tray'], 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'id': 1138, 'synset': 'tree_house.n.01', 'synonyms': ['tree_house'], 'def': '(NOT A TREE) a PLAYHOUSE built in the branches of a tree', 'name': 'tree_house'}, {'frequency': 'r', 'id': 1139, 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'id': 1140, 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'r', 'id': 1141, 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'c', 'id': 1142, 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'id': 1143, 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'id': 1144, 'synset': 'truck.n.01', 'synonyms': ['truck'], 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'id': 1145, 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'id': 1146, 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'id': 1147, 'synset': 'tub.n.02', 'synonyms': ['vat'], 'def': 'a large open vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'id': 1148, 'synset': 'turban.n.01', 'synonyms': ['turban'], 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'r', 'id': 1149, 'synset': 'turkey.n.01', 'synonyms': ['turkey_(bird)'], 'def': 'large gallinaceous bird with fan-shaped tail; widely domesticated for food', 'name': 'turkey_(bird)'}, {'frequency': 'c', 'id': 1150, 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'id': 1151, 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'id': 1152, 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'r', 'id': 1153, 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'r', 'id': 1154, 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'id': 1155, 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'c', 'id': 1156, 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'id': 1157, 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'c', 'id': 1158, 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'r', 'id': 1159, 'synset': 'urn.n.01', 'synonyms': ['urn'], 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'id': 1160, 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'c', 'id': 1161, 'synset': 'valve.n.03', 'synonyms': ['valve'], 'def': 'control consisting of a mechanical device for controlling the flow of a fluid', 'name': 'valve'}, {'frequency': 'f', 'id': 1162, 'synset': 'vase.n.01', 'synonyms': ['vase'], 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'id': 1163, 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'id': 1164, 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'c', 'id': 1165, 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'id': 1166, 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'id': 1167, 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'id': 1168, 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'r', 'id': 1169, 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'id': 1170, 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'id': 1171, 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'id': 1172, 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'id': 1173, 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'id': 1174, 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'id': 1175, 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'id': 1176, 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'id': 1177, 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'c', 'id': 1178, 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'id': 1179, 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'id': 1180, 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'id': 1181, 'synset': 'wasabi.n.02', 'synonyms': ['wasabi'], 'def': 'the thick green root of the wasabi plant that the Japanese use in cooking and that tastes like strong horseradish', 'name': 'wasabi'}, {'frequency': 'c', 'id': 1182, 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'id': 1183, 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'id': 1184, 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'id': 1185, 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'id': 1186, 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'id': 1187, 'synset': 'water_filter.n.01', 'synonyms': ['water_filter'], 'def': 'a filter to remove impurities from the water supply', 'name': 'water_filter'}, {'frequency': 'r', 'id': 1188, 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'r', 'id': 1189, 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'id': 1190, 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'id': 1191, 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'id': 1192, 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'id': 1193, 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'id': 1194, 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'c', 'id': 1195, 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'id': 1196, 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'id': 1197, 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'id': 1198, 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'id': 1199, 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'id': 1200, 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'id': 1201, 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'id': 1202, 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'id': 1203, 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'r', 'id': 1204, 'synset': 'whiskey.n.01', 'synonyms': ['whiskey'], 'def': 'a liquor made from fermented mash of grain', 'name': 'whiskey'}, {'frequency': 'r', 'id': 1205, 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'r', 'id': 1206, 'synset': 'wick.n.02', 'synonyms': ['wick'], 'def': 'a loosely woven cord in a candle or oil lamp that is lit on fire', 'name': 'wick'}, {'frequency': 'c', 'id': 1207, 'synset': 'wig.n.01', 'synonyms': ['wig'], 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'id': 1208, 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'id': 1209, 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'def': 'a mill that is powered by the wind', 'name': 'windmill'}, {'frequency': 'c', 'id': 1210, 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'id': 1211, 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'id': 1212, 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'id': 1213, 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'r', 'id': 1214, 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'id': 1215, 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'r', 'id': 1216, 'synset': 'wing_chair.n.01', 'synonyms': ['wing_chair'], 'def': 'easy chair having wings on each side of a high back', 'name': 'wing_chair'}, {'frequency': 'c', 'id': 1217, 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'id': 1218, 'synset': 'wok.n.01', 'synonyms': ['wok'], 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'id': 1219, 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'id': 1220, 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'id': 1221, 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'id': 1222, 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'c', 'id': 1223, 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'id': 1224, 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'r', 'id': 1225, 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'r', 'id': 1226, 'synset': 'yak.n.02', 'synonyms': ['yak'], 'def': 'large long-haired wild ox of Tibet often domesticated', 'name': 'yak'}, {'frequency': 'c', 'id': 1227, 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'r', 'id': 1228, 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'id': 1229, 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'id': 1230, 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}]  # noqa
+# fmt: on
diff --git a/src/sts/detectron2/data/datasets/lvis_v1_categories.py b/src/sts/detectron2/data/datasets/lvis_v1_categories.py
new file mode 100644
index 0000000000000000000000000000000000000000..7374e6968bb006f5d8c49e75d9d3b31ea3d77d05
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/lvis_v1_categories.py
@@ -0,0 +1,16 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Autogen with
+# with open("lvis_v1_val.json", "r") as f:
+#     a = json.load(f)
+# c = a["categories"]
+# for x in c:
+#     del x["image_count"]
+#     del x["instance_count"]
+# LVIS_CATEGORIES = repr(c) + "  # noqa"
+# with open("/tmp/lvis_categories.py", "wt") as f:
+#     f.write(f"LVIS_CATEGORIES = {LVIS_CATEGORIES}")
+# Then paste the contents of that file below
+
+# fmt: off
+LVIS_CATEGORIES = [{'frequency': 'c', 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'id': 1, 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'id': 2, 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'id': 3, 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'f', 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'id': 4, 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'id': 5, 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'c', 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'id': 6, 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'synset': 'almond.n.02', 'synonyms': ['almond'], 'id': 7, 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'id': 8, 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'c', 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'id': 9, 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'id': 10, 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'id': 11, 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'synset': 'apple.n.01', 'synonyms': ['apple'], 'id': 12, 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'id': 13, 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'id': 14, 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'synset': 'apron.n.01', 'synonyms': ['apron'], 'id': 15, 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'id': 16, 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'r', 'synset': 'arctic.n.02', 'synonyms': ['arctic_(type_of_shoe)', 'galosh', 'golosh', 'rubber_(type_of_shoe)', 'gumshoe'], 'id': 17, 'def': 'a waterproof overshoe that protects shoes from water or snow', 'name': 'arctic_(type_of_shoe)'}, {'frequency': 'c', 'synset': 'armband.n.02', 'synonyms': ['armband'], 'id': 18, 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'id': 19, 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'id': 20, 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'id': 21, 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'id': 22, 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'id': 23, 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'id': 24, 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'id': 25, 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'id': 26, 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'f', 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'id': 27, 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'id': 28, 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'synset': 'awning.n.01', 'synonyms': ['awning'], 'id': 29, 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'id': 30, 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'r', 'synset': 'baboon.n.01', 'synonyms': ['baboon'], 'id': 31, 'def': 'large terrestrial monkeys having doglike muzzles', 'name': 'baboon'}, {'frequency': 'f', 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'id': 32, 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'id': 33, 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'id': 34, 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'id': 35, 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'id': 36, 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'id': 37, 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'id': 38, 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'id': 39, 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'id': 40, 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'synset': 'ball.n.06', 'synonyms': ['ball'], 'id': 41, 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'id': 42, 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'id': 43, 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'id': 44, 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'synset': 'banana.n.02', 'synonyms': ['banana'], 'id': 45, 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'c', 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'id': 46, 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'id': 47, 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'f', 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'id': 48, 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'id': 49, 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'id': 50, 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'id': 51, 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'synset': 'barge.n.01', 'synonyms': ['barge'], 'id': 52, 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'id': 53, 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'id': 54, 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'id': 55, 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'id': 56, 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'id': 57, 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'id': 58, 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'id': 59, 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'id': 60, 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'id': 61, 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'id': 62, 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'id': 63, 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'c', 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'id': 64, 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'id': 65, 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'id': 66, 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'id': 67, 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'id': 68, 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'id': 69, 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'synset': 'battery.n.02', 'synonyms': ['battery'], 'id': 70, 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'id': 71, 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'synset': 'bead.n.01', 'synonyms': ['bead'], 'id': 72, 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'c', 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'id': 73, 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'id': 74, 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'id': 75, 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'synset': 'bear.n.01', 'synonyms': ['bear'], 'id': 76, 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'synset': 'bed.n.01', 'synonyms': ['bed'], 'id': 77, 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'r', 'synset': 'bedpan.n.01', 'synonyms': ['bedpan'], 'id': 78, 'def': 'a shallow vessel used by a bedridden patient for defecation and urination', 'name': 'bedpan'}, {'frequency': 'f', 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'id': 79, 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'synset': 'beef.n.01', 'synonyms': ['cow'], 'id': 80, 'def': 'cattle/cow', 'name': 'cow'}, {'frequency': 'f', 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'id': 81, 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'id': 82, 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'id': 83, 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'id': 84, 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'id': 85, 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'synset': 'bell.n.01', 'synonyms': ['bell'], 'id': 86, 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'id': 87, 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'synset': 'belt.n.02', 'synonyms': ['belt'], 'id': 88, 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'id': 89, 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'synset': 'bench.n.01', 'synonyms': ['bench'], 'id': 90, 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'synset': 'beret.n.01', 'synonyms': ['beret'], 'id': 91, 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'synset': 'bib.n.02', 'synonyms': ['bib'], 'id': 92, 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'id': 93, 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'id': 94, 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'id': 95, 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'f', 'synset': 'billboard.n.01', 'synonyms': ['billboard'], 'id': 96, 'def': 'large outdoor signboard', 'name': 'billboard'}, {'frequency': 'c', 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'id': 97, 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'id': 98, 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'synset': 'bird.n.01', 'synonyms': ['bird'], 'id': 99, 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'c', 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'id': 100, 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'c', 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'id': 101, 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'id': 102, 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'id': 103, 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'id': 104, 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'id': 105, 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'id': 106, 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'id': 107, 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'synset': 'blackberry.n.01', 'synonyms': ['blackberry'], 'id': 108, 'def': 'large sweet black or very dark purple edible aggregate fruit', 'name': 'blackberry'}, {'frequency': 'f', 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'id': 109, 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'id': 110, 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'id': 111, 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'id': 112, 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'id': 113, 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'f', 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'id': 114, 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'f', 'synset': 'blouse.n.01', 'synonyms': ['blouse'], 'id': 115, 'def': 'a top worn by women', 'name': 'blouse'}, {'frequency': 'f', 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'id': 116, 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'id': 117, 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'id': 118, 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'r', 'synset': 'bob.n.05', 'synonyms': ['bob', 'bobber', 'bobfloat'], 'id': 119, 'def': 'a small float usually made of cork; attached to a fishing line', 'name': 'bob'}, {'frequency': 'c', 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'id': 120, 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'c', 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'id': 121, 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'id': 122, 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'id': 123, 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'id': 124, 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'id': 125, 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'id': 126, 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'synset': 'book.n.01', 'synonyms': ['book'], 'id': 127, 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'c', 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'id': 128, 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'id': 129, 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'id': 130, 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'id': 131, 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'synset': 'boot.n.01', 'synonyms': ['boot'], 'id': 132, 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'id': 133, 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'id': 134, 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'id': 135, 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'id': 136, 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'id': 137, 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'id': 138, 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'id': 139, 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'id': 140, 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'id': 141, 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'id': 142, 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'f', 'synset': 'box.n.01', 'synonyms': ['box'], 'id': 143, 'def': 'a (usually rectangular) container; may have a lid', 'name': 'box'}, {'frequency': 'r', 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'id': 144, 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'id': 145, 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'id': 146, 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'id': 147, 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'id': 148, 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'id': 149, 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'f', 'synset': 'bread.n.01', 'synonyms': ['bread'], 'id': 150, 'def': 'food made from dough of flour or meal and usually raised with yeast or baking powder and then baked', 'name': 'bread'}, {'frequency': 'r', 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'id': 151, 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'f', 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'id': 152, 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'id': 153, 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'f', 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'id': 154, 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'id': 155, 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'synset': 'broom.n.01', 'synonyms': ['broom'], 'id': 156, 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'id': 157, 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'id': 158, 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'id': 159, 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'id': 160, 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'id': 161, 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'synset': 'bull.n.11', 'synonyms': ['horned_cow'], 'id': 162, 'def': 'a cow with horns', 'name': 'bull'}, {'frequency': 'c', 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'id': 163, 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'id': 164, 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'id': 165, 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'id': 166, 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'id': 167, 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'id': 168, 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'f', 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'id': 169, 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'id': 170, 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'id': 171, 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'id': 172, 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'id': 173, 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'id': 174, 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'f', 'synset': 'butter.n.01', 'synonyms': ['butter'], 'id': 175, 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'id': 176, 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'synset': 'button.n.01', 'synonyms': ['button'], 'id': 177, 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'id': 178, 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'id': 179, 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'c', 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'id': 180, 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'id': 181, 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'id': 182, 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'synset': 'cake.n.03', 'synonyms': ['cake'], 'id': 183, 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'id': 184, 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'id': 185, 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'synset': 'calf.n.01', 'synonyms': ['calf'], 'id': 186, 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'id': 187, 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'synset': 'camel.n.01', 'synonyms': ['camel'], 'id': 188, 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'synset': 'camera.n.01', 'synonyms': ['camera'], 'id': 189, 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'id': 190, 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'id': 191, 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'id': 192, 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'id': 193, 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'f', 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'id': 194, 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'id': 195, 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'id': 196, 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'id': 197, 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'id': 198, 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'id': 199, 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'c', 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'id': 200, 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'c', 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'id': 201, 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'id': 202, 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'f', 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'id': 203, 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'id': 204, 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'c', 'synset': 'cape.n.02', 'synonyms': ['cape'], 'id': 205, 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'id': 206, 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'id': 207, 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'id': 208, 'def': 'a wheeled vehicle adapted to the rails of railroad (mark each individual railcar separately)', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'id': 209, 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'id': 210, 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'id': 211, 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'synset': 'card.n.03', 'synonyms': ['card'], 'id': 212, 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'c', 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'id': 213, 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'id': 214, 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'id': 215, 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'id': 216, 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'id': 217, 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'f', 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'id': 218, 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'synset': 'cart.n.01', 'synonyms': ['cart'], 'id': 219, 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'synset': 'carton.n.02', 'synonyms': ['carton'], 'id': 220, 'def': 'a container made of cardboard for holding food or drink', 'name': 'carton'}, {'frequency': 'c', 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'id': 221, 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'id': 222, 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'id': 223, 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'id': 224, 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'synset': 'cat.n.01', 'synonyms': ['cat'], 'id': 225, 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'f', 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'id': 226, 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'c', 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'id': 227, 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'id': 228, 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'f', 'synset': 'celery.n.01', 'synonyms': ['celery'], 'id': 229, 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'id': 230, 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'id': 231, 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'synset': 'chair.n.01', 'synonyms': ['chair'], 'id': 232, 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'id': 233, 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'synset': 'chalice.n.01', 'synonyms': ['chalice'], 'id': 234, 'def': 'a bowl-shaped drinking vessel; especially the Eucharistic cup', 'name': 'chalice'}, {'frequency': 'f', 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'id': 235, 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'synset': 'chap.n.04', 'synonyms': ['chap'], 'id': 236, 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'id': 237, 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'id': 238, 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'id': 239, 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'id': 240, 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'c', 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'id': 241, 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'id': 242, 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'c', 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'id': 243, 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'id': 244, 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'id': 245, 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'id': 246, 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'id': 247, 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'id': 248, 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'id': 249, 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'id': 250, 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'id': 251, 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'id': 252, 'def': 'shirt collar, animal collar, or tight-fitting necklace', 'name': 'choker'}, {'frequency': 'f', 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'id': 253, 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'f', 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'id': 254, 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'id': 255, 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'synset': 'chute.n.02', 'synonyms': ['slide'], 'id': 256, 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'id': 257, 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'id': 258, 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'f', 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'id': 259, 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'id': 260, 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'id': 261, 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'id': 262, 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'c', 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'id': 263, 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'id': 264, 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'synset': 'cleat.n.02', 'synonyms': ['cleat_(for_securing_rope)'], 'id': 265, 'def': 'a fastener (usually with two projecting horns) around which a rope can be secured', 'name': 'cleat_(for_securing_rope)'}, {'frequency': 'r', 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'id': 266, 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'synset': 'clip.n.03', 'synonyms': ['clip'], 'id': 267, 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'id': 268, 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'r', 'synset': 'clipper.n.03', 'synonyms': ['clippers_(for_plants)'], 'id': 269, 'def': 'shears for cutting grass or shrubbery (often used in the plural)', 'name': 'clippers_(for_plants)'}, {'frequency': 'r', 'synset': 'cloak.n.02', 'synonyms': ['cloak'], 'id': 270, 'def': 'a loose outer garment', 'name': 'cloak'}, {'frequency': 'f', 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'id': 271, 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'id': 272, 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'id': 273, 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'id': 274, 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'id': 275, 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'id': 276, 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'synset': 'coat.n.01', 'synonyms': ['coat'], 'id': 277, 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'id': 278, 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'c', 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'id': 279, 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'id': 280, 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'r', 'synset': 'cockroach.n.01', 'synonyms': ['cockroach'], 'id': 281, 'def': 'any of numerous chiefly nocturnal insects; some are domestic pests', 'name': 'cockroach'}, {'frequency': 'r', 'synset': 'cocoa.n.01', 'synonyms': ['cocoa_(beverage)', 'hot_chocolate_(beverage)', 'drinking_chocolate'], 'id': 282, 'def': 'a beverage made from cocoa powder and milk and sugar; usually drunk hot', 'name': 'cocoa_(beverage)'}, {'frequency': 'c', 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'id': 283, 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'f', 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'id': 284, 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'id': 285, 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'id': 286, 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'synset': 'coil.n.05', 'synonyms': ['coil'], 'id': 287, 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'synset': 'coin.n.01', 'synonyms': ['coin'], 'id': 288, 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'c', 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'id': 289, 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'id': 290, 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'id': 291, 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'id': 292, 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'id': 293, 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'id': 294, 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'r', 'synset': 'compass.n.01', 'synonyms': ['compass'], 'id': 295, 'def': 'navigational instrument for finding directions', 'name': 'compass'}, {'frequency': 'f', 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'id': 296, 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'f', 'synset': 'condiment.n.01', 'synonyms': ['condiment'], 'id': 297, 'def': 'a preparation (a sauce or relish or spice) to enhance flavor or enjoyment', 'name': 'condiment'}, {'frequency': 'f', 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'id': 298, 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'id': 299, 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'id': 300, 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'id': 301, 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'r', 'synset': 'cooker.n.01', 'synonyms': ['cooker'], 'id': 302, 'def': 'a utensil for cooking', 'name': 'cooker'}, {'frequency': 'f', 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'id': 303, 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'id': 304, 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'id': 305, 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'f', 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'id': 306, 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'id': 307, 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'c', 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'id': 308, 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'f', 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'id': 309, 'def': 'ears or kernels of corn that can be prepared and served for human food (only mark individual ears or kernels)', 'name': 'edible_corn'}, {'frequency': 'r', 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'id': 310, 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'id': 311, 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'id': 312, 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'id': 313, 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'c', 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'id': 314, 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'c', 'synset': 'costume.n.04', 'synonyms': ['costume'], 'id': 315, 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'id': 316, 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'id': 317, 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'c', 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'id': 318, 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'id': 319, 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'c', 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'id': 320, 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'r', 'synset': 'crab.n.05', 'synonyms': ['crabmeat'], 'id': 321, 'def': 'the edible flesh of any of various crabs', 'name': 'crabmeat'}, {'frequency': 'c', 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'id': 322, 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'id': 323, 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'synset': 'crate.n.01', 'synonyms': ['crate'], 'id': 324, 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'c', 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'id': 325, 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'id': 326, 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'c', 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'id': 327, 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'id': 328, 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'id': 329, 'def': 'an earthen jar (made of baked clay) or a modern electric crockpot', 'name': 'crock_pot'}, {'frequency': 'f', 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'id': 330, 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'id': 331, 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'c', 'synset': 'crow.n.01', 'synonyms': ['crow'], 'id': 332, 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'r', 'synset': 'crowbar.n.01', 'synonyms': ['crowbar', 'wrecking_bar', 'pry_bar'], 'id': 333, 'def': 'a heavy iron lever with one end forged into a wedge', 'name': 'crowbar'}, {'frequency': 'c', 'synset': 'crown.n.04', 'synonyms': ['crown'], 'id': 334, 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'id': 335, 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'id': 336, 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'id': 337, 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'f', 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'id': 338, 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'c', 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'id': 339, 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'id': 340, 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'c', 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'id': 341, 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'id': 342, 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'id': 343, 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'synset': 'cup.n.01', 'synonyms': ['cup'], 'id': 344, 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'id': 345, 'def': 'a metal award or cup-shaped vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'f', 'synset': 'cupboard.n.01', 'synonyms': ['cupboard', 'closet'], 'id': 346, 'def': 'a small room (or recess) or cabinet used for storage space', 'name': 'cupboard'}, {'frequency': 'f', 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'id': 347, 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'id': 348, 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'id': 349, 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'id': 350, 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'id': 351, 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'id': 352, 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'id': 353, 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'id': 354, 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'synset': 'dalmatian.n.02', 'synonyms': ['dalmatian'], 'id': 355, 'def': 'a large breed having a smooth white coat with black or brown spots', 'name': 'dalmatian'}, {'frequency': 'c', 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'id': 356, 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'id': 357, 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'id': 358, 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'id': 359, 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'id': 360, 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'synset': 'desk.n.01', 'synonyms': ['desk'], 'id': 361, 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'id': 362, 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'id': 363, 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'id': 364, 'def': 'yearly planner book', 'name': 'diary'}, {'frequency': 'r', 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'id': 365, 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'id': 366, 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'id': 367, 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'id': 368, 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'f', 'synset': 'dish.n.01', 'synonyms': ['dish'], 'id': 369, 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'id': 370, 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'id': 371, 'def': 'a cloth for washing dishes or cleaning in general', 'name': 'dishrag'}, {'frequency': 'f', 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'id': 372, 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'id': 373, 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid', 'dishsoap'], 'id': 374, 'def': 'dishsoap or dish detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'f', 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'id': 375, 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'r', 'synset': 'diving_board.n.01', 'synonyms': ['diving_board'], 'id': 376, 'def': 'a springboard from which swimmers can dive', 'name': 'diving_board'}, {'frequency': 'f', 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'id': 377, 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'synset': 'dog.n.01', 'synonyms': ['dog'], 'id': 378, 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'id': 379, 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'f', 'synset': 'doll.n.01', 'synonyms': ['doll'], 'id': 380, 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'id': 381, 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'synset': 'dollhouse.n.01', 'synonyms': ['dollhouse', "doll's_house"], 'id': 382, 'def': "a house so small that it is likened to a child's plaything", 'name': 'dollhouse'}, {'frequency': 'c', 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'id': 383, 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'id': 384, 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'f', 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'id': 385, 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'id': 386, 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'id': 387, 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'synset': 'dove.n.01', 'synonyms': ['dove'], 'id': 388, 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'id': 389, 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'id': 390, 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'id': 391, 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'id': 392, 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'id': 393, 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'f', 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'id': 394, 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'f', 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'id': 395, 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'synset': 'drill.n.01', 'synonyms': ['drill'], 'id': 396, 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'synset': 'drone.n.04', 'synonyms': ['drone'], 'id': 397, 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'id': 398, 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'id': 399, 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'id': 400, 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'synset': 'duck.n.01', 'synonyms': ['duck'], 'id': 401, 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'c', 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'id': 402, 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'id': 403, 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'id': 404, 'def': 'a large cylindrical bag of heavy cloth (does not include suitcases)', 'name': 'duffel_bag'}, {'frequency': 'r', 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'id': 405, 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'id': 406, 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'id': 407, 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'c', 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'id': 408, 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'id': 409, 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'id': 410, 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'synset': 'earring.n.01', 'synonyms': ['earring'], 'id': 411, 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'synset': 'easel.n.01', 'synonyms': ['easel'], 'id': 412, 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'id': 413, 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'synset': 'eel.n.01', 'synonyms': ['eel'], 'id': 414, 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'id': 415, 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'id': 416, 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'id': 417, 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'id': 418, 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'id': 419, 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'id': 420, 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'id': 421, 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'id': 422, 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'c', 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'id': 423, 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'id': 424, 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'id': 425, 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'id': 426, 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'id': 427, 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'id': 428, 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'synset': 'fan.n.01', 'synonyms': ['fan'], 'id': 429, 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'id': 430, 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'id': 431, 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'id': 432, 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'id': 433, 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'c', 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'id': 434, 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'id': 435, 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'id': 436, 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'id': 437, 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'id': 438, 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'id': 439, 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'id': 440, 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'f', 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'id': 441, 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'f', 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'id': 442, 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'id': 443, 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'id': 444, 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'id': 445, 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'r', 'synset': 'first-aid_kit.n.01', 'synonyms': ['first-aid_kit'], 'id': 446, 'def': 'kit consisting of a set of bandages and medicines for giving first aid', 'name': 'first-aid_kit'}, {'frequency': 'f', 'synset': 'fish.n.01', 'synonyms': ['fish'], 'id': 447, 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'c', 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'id': 448, 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'id': 449, 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'c', 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'id': 450, 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'synset': 'flag.n.01', 'synonyms': ['flag'], 'id': 451, 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'id': 452, 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'id': 453, 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'id': 454, 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'c', 'synset': 'flap.n.01', 'synonyms': ['flap'], 'id': 455, 'def': 'any broad thin covering attached at one edge, such as a mud flap next to a wheel or a flap on an airplane wing', 'name': 'flap'}, {'frequency': 'r', 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'id': 456, 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'id': 457, 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'id': 458, 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'id': 459, 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'id': 460, 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'id': 461, 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'id': 462, 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'c', 'synset': 'foal.n.01', 'synonyms': ['foal'], 'id': 463, 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'id': 464, 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'id': 465, 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'id': 466, 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'id': 467, 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'id': 468, 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'synset': 'fork.n.01', 'synonyms': ['fork'], 'id': 469, 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'c', 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'id': 470, 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'c', 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'id': 471, 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'c', 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'id': 472, 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'id': 473, 'def': 'anything that freshens air by removing or covering odor', 'name': 'freshener'}, {'frequency': 'f', 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'id': 474, 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'id': 475, 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'id': 476, 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'f', 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'id': 477, 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'id': 478, 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'id': 479, 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'r', 'synset': 'futon.n.01', 'synonyms': ['futon'], 'id': 480, 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'id': 481, 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'id': 482, 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'id': 483, 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'id': 484, 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'id': 485, 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'id': 486, 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'id': 487, 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'id': 488, 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'c', 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'id': 489, 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'id': 490, 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'id': 491, 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'r', 'synset': 'generator.n.02', 'synonyms': ['generator'], 'id': 492, 'def': 'engine that converts mechanical energy into electrical energy by electromagnetic induction', 'name': 'generator'}, {'frequency': 'c', 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'id': 493, 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'id': 494, 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'id': 495, 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'id': 496, 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'id': 497, 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'id': 498, 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'synset': 'globe.n.03', 'synonyms': ['globe'], 'id': 499, 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'synset': 'glove.n.02', 'synonyms': ['glove'], 'id': 500, 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'synset': 'goat.n.01', 'synonyms': ['goat'], 'id': 501, 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'id': 502, 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'id': 503, 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'c', 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'id': 504, 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'id': 505, 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'id': 506, 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'synset': 'goose.n.01', 'synonyms': ['goose'], 'id': 507, 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'id': 508, 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'id': 509, 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'f', 'synset': 'grape.n.01', 'synonyms': ['grape'], 'id': 510, 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'c', 'synset': 'grater.n.01', 'synonyms': ['grater'], 'id': 511, 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'id': 512, 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'id': 513, 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'f', 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'id': 514, 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'f', 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'id': 515, 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'id': 516, 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'f', 'synset': 'grill.n.02', 'synonyms': ['grill', 'grille', 'grillwork', 'radiator_grille'], 'id': 517, 'def': 'a framework of metal bars used as a partition or a grate', 'name': 'grill'}, {'frequency': 'r', 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'id': 518, 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'id': 519, 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'id': 520, 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'f', 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'id': 521, 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'id': 522, 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'synset': 'gun.n.01', 'synonyms': ['gun'], 'id': 523, 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'f', 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'id': 524, 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'id': 525, 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'id': 526, 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'r', 'synset': 'halter.n.03', 'synonyms': ['halter_top'], 'id': 527, 'def': "a woman's top that fastens behind the back and neck leaving the back and arms uncovered", 'name': 'halter_top'}, {'frequency': 'f', 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'id': 528, 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'id': 529, 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'id': 530, 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'c', 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'id': 531, 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'id': 532, 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'c', 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'id': 533, 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'f', 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'id': 534, 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'id': 535, 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'id': 536, 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'id': 537, 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'id': 538, 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'id': 539, 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'id': 540, 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'id': 541, 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'id': 542, 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'id': 543, 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'synset': 'hat.n.01', 'synonyms': ['hat'], 'id': 544, 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'id': 545, 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'c', 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'id': 546, 'def': 'a garment that covers the head OR face', 'name': 'veil'}, {'frequency': 'f', 'synset': 'headband.n.01', 'synonyms': ['headband'], 'id': 547, 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'id': 548, 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'id': 549, 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'id': 550, 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'synset': 'headset.n.01', 'synonyms': ['headset'], 'id': 551, 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'id': 552, 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'c', 'synset': 'heart.n.02', 'synonyms': ['heart'], 'id': 553, 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'id': 554, 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'id': 555, 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'id': 556, 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'synset': 'heron.n.02', 'synonyms': ['heron'], 'id': 557, 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'id': 558, 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'id': 559, 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'id': 560, 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'id': 561, 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'id': 562, 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'id': 563, 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'synset': 'honey.n.01', 'synonyms': ['honey'], 'id': 564, 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'id': 565, 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'synset': 'hook.n.05', 'synonyms': ['hook'], 'id': 566, 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'r', 'synset': 'hookah.n.01', 'synonyms': ['hookah', 'narghile', 'nargileh', 'sheesha', 'shisha', 'water_pipe'], 'id': 567, 'def': 'a tobacco pipe with a long flexible tube connected to a container where the smoke is cooled by passing through water', 'name': 'hookah'}, {'frequency': 'r', 'synset': 'hornet.n.01', 'synonyms': ['hornet'], 'id': 568, 'def': 'large stinging wasp', 'name': 'hornet'}, {'frequency': 'f', 'synset': 'horse.n.01', 'synonyms': ['horse'], 'id': 569, 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'id': 570, 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'id': 571, 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'id': 572, 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'id': 573, 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'id': 574, 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'id': 575, 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'c', 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'id': 576, 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'id': 577, 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'f', 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'id': 578, 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'id': 579, 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'id': 580, 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'id': 581, 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'id': 582, 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'id': 583, 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'c', 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'id': 584, 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'id': 585, 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'f', 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'id': 586, 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'id': 587, 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'c', 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'id': 588, 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'id': 589, 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'c', 'synset': 'jam.n.01', 'synonyms': ['jam'], 'id': 590, 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'synset': 'jar.n.01', 'synonyms': ['jar'], 'id': 591, 'def': 'a vessel (usually cylindrical) with a wide mouth and without handles', 'name': 'jar'}, {'frequency': 'f', 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'id': 592, 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'id': 593, 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'id': 594, 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'id': 595, 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'id': 596, 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'r', 'synset': 'jewel.n.01', 'synonyms': ['jewel', 'gem', 'precious_stone'], 'id': 597, 'def': 'a precious or semiprecious stone incorporated into a piece of jewelry', 'name': 'jewel'}, {'frequency': 'c', 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'id': 598, 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'id': 599, 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'c', 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'id': 600, 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'id': 601, 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'synset': 'keg.n.02', 'synonyms': ['keg'], 'id': 602, 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'id': 603, 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'id': 604, 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'synset': 'key.n.01', 'synonyms': ['key'], 'id': 605, 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'id': 606, 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'c', 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'id': 607, 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'id': 608, 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'id': 609, 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'r', 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'id': 610, 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'synset': 'kite.n.03', 'synonyms': ['kite'], 'id': 611, 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'id': 612, 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'id': 613, 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'id': 614, 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'synset': 'knife.n.01', 'synonyms': ['knife'], 'id': 615, 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'id': 616, 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'synset': 'knob.n.02', 'synonyms': ['knob'], 'id': 617, 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'id': 618, 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'id': 619, 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'id': 620, 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'id': 621, 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'id': 622, 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'c', 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'id': 623, 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'f', 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'id': 624, 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'id': 625, 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'id': 626, 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'id': 627, 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'id': 628, 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'id': 629, 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'id': 630, 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'id': 631, 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'id': 632, 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'f', 'synset': 'latch.n.02', 'synonyms': ['latch'], 'id': 633, 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'id': 634, 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'synset': 'leather.n.01', 'synonyms': ['leather'], 'id': 635, 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'id': 636, 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'id': 637, 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'r', 'synset': 'legume.n.02', 'synonyms': ['legume'], 'id': 638, 'def': 'the fruit or seed of bean or pea plants', 'name': 'legume'}, {'frequency': 'f', 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'id': 639, 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'id': 640, 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'id': 641, 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'id': 642, 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'id': 643, 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'id': 644, 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'id': 645, 'def': 'lightblub/source of light', 'name': 'lightbulb'}, {'frequency': 'r', 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'id': 646, 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'f', 'synset': 'lime.n.06', 'synonyms': ['lime'], 'id': 647, 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'id': 648, 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'c', 'synset': 'lion.n.01', 'synonyms': ['lion'], 'id': 649, 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'id': 650, 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'r', 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'id': 651, 'def': 'liquor or beer', 'name': 'liquor'}, {'frequency': 'c', 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'id': 652, 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'f', 'synset': 'log.n.01', 'synonyms': ['log'], 'id': 653, 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'id': 654, 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'f', 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'id': 655, 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'id': 656, 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'id': 657, 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'id': 658, 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'id': 659, 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'c', 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'id': 660, 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'f', 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'id': 661, 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'synset': 'mallard.n.01', 'synonyms': ['mallard'], 'id': 662, 'def': 'wild dabbling duck from which domestic ducks are descended', 'name': 'mallard'}, {'frequency': 'r', 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'id': 663, 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'id': 664, 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'r', 'synset': 'manatee.n.01', 'synonyms': ['manatee'], 'id': 665, 'def': 'sirenian mammal of tropical coastal waters of America', 'name': 'manatee'}, {'frequency': 'c', 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'id': 666, 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'id': 667, 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'id': 668, 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'f', 'synset': 'map.n.01', 'synonyms': ['map'], 'id': 669, 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'f', 'synset': 'marker.n.03', 'synonyms': ['marker'], 'id': 670, 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'synset': 'martini.n.01', 'synonyms': ['martini'], 'id': 671, 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'id': 672, 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'id': 673, 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'synset': 'masher.n.02', 'synonyms': ['masher'], 'id': 674, 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'id': 675, 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'synset': 'mast.n.01', 'synonyms': ['mast'], 'id': 676, 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'id': 677, 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'id': 678, 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'id': 679, 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'id': 680, 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'id': 681, 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'id': 682, 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'id': 683, 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'c', 'synset': 'melon.n.01', 'synonyms': ['melon'], 'id': 684, 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'id': 685, 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'id': 686, 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'id': 687, 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'id': 688, 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'f', 'synset': 'milk.n.01', 'synonyms': ['milk'], 'id': 689, 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'r', 'synset': 'milk_can.n.01', 'synonyms': ['milk_can'], 'id': 690, 'def': 'can for transporting milk', 'name': 'milk_can'}, {'frequency': 'r', 'synset': 'milkshake.n.01', 'synonyms': ['milkshake'], 'id': 691, 'def': 'frothy drink of milk and flavoring and sometimes fruit or ice cream', 'name': 'milkshake'}, {'frequency': 'f', 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'id': 692, 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'id': 693, 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'id': 694, 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'id': 695, 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'id': 696, 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'synset': 'money.n.03', 'synonyms': ['money'], 'id': 697, 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'id': 698, 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'id': 699, 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'synset': 'motor.n.01', 'synonyms': ['motor'], 'id': 700, 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'id': 701, 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'id': 702, 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'f', 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'id': 703, 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'id': 704, 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'f', 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'id': 705, 'def': 'a computer input device that controls an on-screen pointer (does not include trackpads / touchpads)', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'id': 706, 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'id': 707, 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'synset': 'mug.n.04', 'synonyms': ['mug'], 'id': 708, 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'id': 709, 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'id': 710, 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'c', 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'id': 711, 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'id': 712, 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'f', 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'id': 713, 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'id': 714, 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'id': 715, 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'id': 716, 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'c', 'synset': 'needle.n.03', 'synonyms': ['needle'], 'id': 717, 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'synset': 'nest.n.01', 'synonyms': ['nest'], 'id': 718, 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'f', 'synset': 'newspaper.n.01', 'synonyms': ['newspaper', 'paper_(newspaper)'], 'id': 719, 'def': 'a daily or weekly publication on folded sheets containing news, articles, and advertisements', 'name': 'newspaper'}, {'frequency': 'c', 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'id': 720, 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'id': 721, 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'id': 722, 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'c', 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'id': 723, 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'id': 724, 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'id': 725, 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'f', 'synset': 'nut.n.03', 'synonyms': ['nut'], 'id': 726, 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'id': 727, 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'f', 'synset': 'oar.n.01', 'synonyms': ['oar'], 'id': 728, 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'id': 729, 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'id': 730, 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'id': 731, 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'id': 732, 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'id': 733, 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'synset': 'onion.n.01', 'synonyms': ['onion'], 'id': 734, 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'id': 735, 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'id': 736, 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'c', 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'id': 737, 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'f', 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'id': 738, 'def': 'a thick standalone cushion used as a seat or footrest, often next to a chair', 'name': 'ottoman'}, {'frequency': 'f', 'synset': 'oven.n.01', 'synonyms': ['oven'], 'id': 739, 'def': 'kitchen appliance used for baking or roasting', 'name': 'oven'}, {'frequency': 'c', 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'id': 740, 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'synset': 'owl.n.01', 'synonyms': ['owl'], 'id': 741, 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'synset': 'packet.n.03', 'synonyms': ['packet'], 'id': 742, 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'id': 743, 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'synset': 'pad.n.04', 'synonyms': ['pad'], 'id': 744, 'def': 'mostly arm/knee pads labeled', 'name': 'pad'}, {'frequency': 'f', 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'id': 745, 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'id': 746, 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'c', 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'id': 747, 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'synset': 'painting.n.01', 'synonyms': ['painting'], 'id': 748, 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'f', 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'id': 749, 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'id': 750, 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'id': 751, 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'id': 752, 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'id': 753, 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'id': 754, 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'id': 755, 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'f', 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'id': 756, 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'id': 757, 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'id': 758, 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'id': 759, 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'id': 760, 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'c', 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'id': 761, 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'id': 762, 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'c', 'synset': 'parasol.n.01', 'synonyms': ['parasol', 'sunshade'], 'id': 763, 'def': 'a handheld collapsible source of shade', 'name': 'parasol'}, {'frequency': 'r', 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'id': 764, 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'c', 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'id': 765, 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'id': 766, 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'id': 767, 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'id': 768, 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'id': 769, 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'c', 'synset': 'passport.n.02', 'synonyms': ['passport'], 'id': 770, 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'id': 771, 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'id': 772, 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'id': 773, 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'synset': 'peach.n.03', 'synonyms': ['peach'], 'id': 774, 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'id': 775, 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'f', 'synset': 'pear.n.01', 'synonyms': ['pear'], 'id': 776, 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'c', 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'id': 777, 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'synset': 'peg.n.04', 'synonyms': ['wooden_leg', 'pegleg'], 'id': 778, 'def': 'a prosthesis that replaces a missing leg', 'name': 'wooden_leg'}, {'frequency': 'r', 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'id': 779, 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'id': 780, 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'synset': 'pen.n.01', 'synonyms': ['pen'], 'id': 781, 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'f', 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'id': 782, 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'id': 783, 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'id': 784, 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'id': 785, 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'id': 786, 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'id': 787, 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'id': 788, 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'f', 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'id': 789, 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'id': 790, 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'id': 791, 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'id': 792, 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'synset': 'person.n.01', 'synonyms': ['person', 'baby', 'child', 'boy', 'girl', 'man', 'woman', 'human'], 'id': 793, 'def': 'a human being', 'name': 'person'}, {'frequency': 'c', 'synset': 'pet.n.01', 'synonyms': ['pet'], 'id': 794, 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'c', 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'id': 795, 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'id': 796, 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'id': 797, 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'f', 'synset': 'piano.n.01', 'synonyms': ['piano'], 'id': 798, 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'id': 799, 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'id': 800, 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'synset': 'pie.n.01', 'synonyms': ['pie'], 'id': 801, 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'id': 802, 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'id': 803, 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'id': 804, 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'id': 805, 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'id': 806, 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'id': 807, 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'id': 808, 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'id': 809, 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'id': 810, 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'id': 811, 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'id': 812, 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'c', 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'id': 813, 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'id': 814, 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'id': 815, 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'id': 816, 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'id': 817, 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'synset': 'plate.n.04', 'synonyms': ['plate'], 'id': 818, 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'synset': 'platter.n.01', 'synonyms': ['platter'], 'id': 819, 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'id': 820, 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'id': 821, 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'id': 822, 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'synset': 'plume.n.02', 'synonyms': ['plume'], 'id': 823, 'def': 'a feather or cluster of feathers worn as an ornament', 'name': 'plume'}, {'frequency': 'r', 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'id': 824, 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'id': 825, 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'id': 826, 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'id': 827, 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'f', 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'id': 828, 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'id': 829, 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'synset': 'pony.n.05', 'synonyms': ['pony'], 'id': 830, 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'id': 831, 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'id': 832, 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'c', 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'id': 833, 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'id': 834, 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'id': 835, 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'synset': 'pot.n.01', 'synonyms': ['pot'], 'id': 836, 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'id': 837, 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'synset': 'potato.n.01', 'synonyms': ['potato'], 'id': 838, 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'id': 839, 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'id': 840, 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'id': 841, 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'c', 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'id': 842, 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'id': 843, 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'c', 'synset': 'pretzel.n.01', 'synonyms': ['pretzel'], 'id': 844, 'def': 'glazed and salted cracker typically in the shape of a loose knot', 'name': 'pretzel'}, {'frequency': 'f', 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'id': 845, 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'id': 846, 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'synset': 'projector.n.02', 'synonyms': ['projector'], 'id': 847, 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'id': 848, 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'synset': 'prune.n.01', 'synonyms': ['prune'], 'id': 849, 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'id': 850, 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'id': 851, 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'id': 852, 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'id': 853, 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'id': 854, 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'id': 855, 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'id': 856, 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'c', 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'id': 857, 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'id': 858, 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'id': 859, 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'id': 860, 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'id': 861, 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'id': 862, 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'id': 863, 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'synset': 'radar.n.01', 'synonyms': ['radar'], 'id': 864, 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'f', 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'id': 865, 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'id': 866, 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'id': 867, 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'synset': 'raft.n.01', 'synonyms': ['raft'], 'id': 868, 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'id': 869, 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'id': 870, 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'id': 871, 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'id': 872, 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'synset': 'rat.n.01', 'synonyms': ['rat'], 'id': 873, 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'id': 874, 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'id': 875, 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'id': 876, 'def': 'vehicle mirror (side or rearview)', 'name': 'rearview_mirror'}, {'frequency': 'c', 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'id': 877, 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'id': 878, 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'c', 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'id': 879, 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'f', 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'id': 880, 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'id': 881, 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'id': 882, 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'id': 883, 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'c', 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'id': 884, 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'synset': 'ring.n.08', 'synonyms': ['ring'], 'id': 885, 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'id': 886, 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'id': 887, 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'synset': 'robe.n.01', 'synonyms': ['robe'], 'id': 888, 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'id': 889, 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'synset': 'rodent.n.01', 'synonyms': ['rodent'], 'id': 890, 'def': 'relatively small placental mammals having a single pair of constantly growing incisor teeth specialized for gnawing', 'name': 'rodent'}, {'frequency': 'r', 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'id': 891, 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'id': 892, 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'id': 893, 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'id': 894, 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'id': 895, 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'id': 896, 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'id': 897, 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'id': 898, 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'id': 899, 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'id': 900, 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'id': 901, 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'id': 902, 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'f', 'synset': 'sail.n.01', 'synonyms': ['sail'], 'id': 903, 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'f', 'synset': 'salad.n.01', 'synonyms': ['salad'], 'id': 904, 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'id': 905, 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'c', 'synset': 'salami.n.01', 'synonyms': ['salami'], 'id': 906, 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'c', 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'id': 907, 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'id': 908, 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'c', 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'id': 909, 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'id': 910, 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'id': 911, 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'id': 912, 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'id': 913, 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'id': 914, 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'id': 915, 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'id': 916, 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'id': 917, 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'id': 918, 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'id': 919, 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'id': 920, 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'id': 921, 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'id': 922, 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'id': 923, 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'f', 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'id': 924, 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'r', 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'id': 925, 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'c', 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'id': 926, 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'f', 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'id': 927, 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'id': 928, 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'c', 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'id': 929, 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'c', 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'id': 930, 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'id': 931, 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'id': 932, 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'c', 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'id': 933, 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'c', 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'id': 934, 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'id': 935, 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'c', 'synset': 'shark.n.01', 'synonyms': ['shark'], 'id': 936, 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'id': 937, 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'id': 938, 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'id': 939, 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'id': 940, 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'id': 941, 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'synset': 'shears.n.01', 'synonyms': ['shears'], 'id': 942, 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'id': 943, 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'id': 944, 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'id': 945, 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'c', 'synset': 'shield.n.02', 'synonyms': ['shield'], 'id': 946, 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'id': 947, 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'id': 948, 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'f', 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'id': 949, 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'id': 950, 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'id': 951, 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'id': 952, 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'f', 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'id': 953, 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'id': 954, 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'id': 955, 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'r', 'synset': 'shower_cap.n.01', 'synonyms': ['shower_cap'], 'id': 956, 'def': 'a tight cap worn to keep hair dry while showering', 'name': 'shower_cap'}, {'frequency': 'f', 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'id': 957, 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'id': 958, 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'f', 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'id': 959, 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'synset': 'silo.n.01', 'synonyms': ['silo'], 'id': 960, 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'synset': 'sink.n.01', 'synonyms': ['sink'], 'id': 961, 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'id': 962, 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'id': 963, 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'synset': 'ski.n.01', 'synonyms': ['ski'], 'id': 964, 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'id': 965, 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'id': 966, 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'id': 967, 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'id': 968, 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'r', 'synset': 'skullcap.n.01', 'synonyms': ['skullcap'], 'id': 969, 'def': 'rounded brimless cap fitting the crown of the head', 'name': 'skullcap'}, {'frequency': 'c', 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'id': 970, 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'id': 971, 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'id': 972, 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'id': 973, 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'id': 974, 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'id': 975, 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'id': 976, 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'id': 977, 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'id': 978, 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'synset': 'soap.n.01', 'synonyms': ['soap'], 'id': 979, 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'id': 980, 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'synset': 'sock.n.01', 'synonyms': ['sock'], 'id': 981, 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'f', 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'id': 982, 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'synset': 'softball.n.01', 'synonyms': ['softball'], 'id': 983, 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'id': 984, 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'id': 985, 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'f', 'synset': 'soup.n.01', 'synonyms': ['soup'], 'id': 986, 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'id': 987, 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'id': 988, 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'id': 989, 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'id': 990, 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'id': 991, 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'id': 992, 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'id': 993, 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'id': 994, 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'id': 995, 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'id': 996, 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'c', 'synset': 'spider.n.01', 'synonyms': ['spider'], 'id': 997, 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'r', 'synset': 'spiny_lobster.n.02', 'synonyms': ['crawfish', 'crayfish'], 'id': 998, 'def': 'large edible marine crustacean having a spiny carapace but lacking the large pincers of true lobsters', 'name': 'crawfish'}, {'frequency': 'c', 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'id': 999, 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'id': 1000, 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'id': 1001, 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'id': 1002, 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'synset': 'squid.n.01', 'synonyms': ['squid_(food)', 'calamari', 'calamary'], 'id': 1003, 'def': '(Italian cuisine) squid prepared as food', 'name': 'squid_(food)'}, {'frequency': 'c', 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'id': 1004, 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'r', 'synset': 'stagecoach.n.01', 'synonyms': ['stagecoach'], 'id': 1005, 'def': 'a large coach-and-four formerly used to carry passengers and mail on regular routes between towns', 'name': 'stagecoach'}, {'frequency': 'c', 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'id': 1006, 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'c', 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'id': 1007, 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'id': 1008, 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'id': 1009, 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'id': 1010, 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'f', 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'id': 1011, 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'id': 1012, 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'id': 1013, 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'id': 1014, 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'synset': 'stew.n.02', 'synonyms': ['stew'], 'id': 1015, 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'id': 1016, 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'id': 1017, 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'f', 'synset': 'stool.n.01', 'synonyms': ['stool'], 'id': 1018, 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'id': 1019, 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'id': 1020, 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'id': 1021, 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'id': 1022, 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'synset': 'strap.n.01', 'synonyms': ['strap'], 'id': 1023, 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'id': 1024, 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'id': 1025, 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'id': 1026, 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'id': 1027, 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'id': 1028, 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'id': 1029, 'def': 'a pointed tool for writing or drawing or engraving, including pens', 'name': 'stylus'}, {'frequency': 'r', 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'id': 1030, 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'id': 1031, 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'id': 1032, 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'f', 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'id': 1033, 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'id': 1034, 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'id': 1035, 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'id': 1036, 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'f', 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'id': 1037, 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'id': 1038, 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'synset': 'swab.n.02', 'synonyms': ['mop'], 'id': 1039, 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'id': 1040, 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'id': 1041, 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'id': 1042, 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'id': 1043, 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'id': 1044, 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'id': 1045, 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'synset': 'sword.n.01', 'synonyms': ['sword'], 'id': 1046, 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'id': 1047, 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'id': 1048, 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'id': 1049, 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'synset': 'table.n.02', 'synonyms': ['table'], 'id': 1050, 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'id': 1051, 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'id': 1052, 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'id': 1053, 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'synset': 'taco.n.02', 'synonyms': ['taco'], 'id': 1054, 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'synset': 'tag.n.02', 'synonyms': ['tag'], 'id': 1055, 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'id': 1056, 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'id': 1057, 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'id': 1058, 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'f', 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'id': 1059, 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'id': 1060, 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'f', 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'id': 1061, 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'id': 1062, 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'id': 1063, 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'id': 1064, 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'id': 1065, 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'id': 1066, 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'c', 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'id': 1067, 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'id': 1068, 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'id': 1069, 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'f', 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'id': 1070, 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'id': 1071, 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'id': 1072, 'def': 'electronic device for communicating by voice over long distances (includes wired and wireless/cell phones)', 'name': 'telephone'}, {'frequency': 'c', 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'id': 1073, 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'id': 1074, 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'id': 1075, 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'id': 1076, 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'id': 1077, 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'id': 1078, 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'id': 1079, 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'id': 1080, 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'id': 1081, 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'id': 1082, 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'f', 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'id': 1083, 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'id': 1084, 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'id': 1085, 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'id': 1086, 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'id': 1087, 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'id': 1088, 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'id': 1089, 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'id': 1090, 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'id': 1091, 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'c', 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'id': 1092, 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'id': 1093, 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'id': 1094, 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'id': 1095, 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'f', 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'id': 1096, 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'id': 1097, 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'id': 1098, 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'id': 1099, 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'f', 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'id': 1100, 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'id': 1101, 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'id': 1102, 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'id': 1103, 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'f', 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'id': 1104, 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'f', 'synset': 'top.n.09', 'synonyms': ['cover'], 'id': 1105, 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'id': 1106, 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'id': 1107, 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'synset': 'towel.n.01', 'synonyms': ['towel'], 'id': 1108, 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'id': 1109, 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'synset': 'toy.n.03', 'synonyms': ['toy'], 'id': 1110, 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'id': 1111, 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'id': 1112, 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'c', 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'id': 1113, 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'f', 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'id': 1114, 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'id': 1115, 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'id': 1116, 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'synset': 'tray.n.01', 'synonyms': ['tray'], 'id': 1117, 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'id': 1118, 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'id': 1119, 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'c', 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'id': 1120, 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'f', 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'id': 1121, 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'id': 1122, 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'synset': 'truck.n.01', 'synonyms': ['truck'], 'id': 1123, 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'id': 1124, 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'id': 1125, 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'synset': 'tub.n.02', 'synonyms': ['vat'], 'id': 1126, 'def': 'a large vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'synset': 'turban.n.01', 'synonyms': ['turban'], 'id': 1127, 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'c', 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'id': 1128, 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'id': 1129, 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'id': 1130, 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'c', 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'id': 1131, 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'c', 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'id': 1132, 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'id': 1133, 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'f', 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'id': 1134, 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'id': 1135, 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'f', 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'id': 1136, 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'c', 'synset': 'urn.n.01', 'synonyms': ['urn'], 'id': 1137, 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'id': 1138, 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'f', 'synset': 'vase.n.01', 'synonyms': ['vase'], 'id': 1139, 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'id': 1140, 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'id': 1141, 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'f', 'synset': 'vest.n.01', 'synonyms': ['vest', 'waistcoat'], 'id': 1142, 'def': "a man's sleeveless garment worn underneath a coat", 'name': 'vest'}, {'frequency': 'c', 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'id': 1143, 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'id': 1144, 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'id': 1145, 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'id': 1146, 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'c', 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'id': 1147, 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'id': 1148, 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'id': 1149, 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'id': 1150, 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'id': 1151, 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'id': 1152, 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'id': 1153, 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'id': 1154, 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'id': 1155, 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'f', 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'id': 1156, 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'id': 1157, 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'id': 1158, 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'synset': 'washbasin.n.01', 'synonyms': ['washbasin', 'basin_(for_washing)', 'washbowl', 'washstand', 'handbasin'], 'id': 1159, 'def': 'a bathroom sink that is permanently installed and connected to a water supply and drainpipe; where you can wash your hands and face', 'name': 'washbasin'}, {'frequency': 'c', 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'id': 1160, 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'id': 1161, 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'id': 1162, 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'id': 1163, 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'id': 1164, 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'id': 1165, 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'c', 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'id': 1166, 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'id': 1167, 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'id': 1168, 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'id': 1169, 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'id': 1170, 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'id': 1171, 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'f', 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'id': 1172, 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'id': 1173, 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'id': 1174, 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'id': 1175, 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'id': 1176, 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'id': 1177, 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'id': 1178, 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'id': 1179, 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'id': 1180, 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'c', 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'id': 1181, 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'c', 'synset': 'wig.n.01', 'synonyms': ['wig'], 'id': 1182, 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'id': 1183, 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'id': 1184, 'def': 'A mill or turbine that is powered by wind', 'name': 'windmill'}, {'frequency': 'c', 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'id': 1185, 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'id': 1186, 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'id': 1187, 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'id': 1188, 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'c', 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'id': 1189, 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'id': 1190, 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'f', 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'id': 1191, 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'synset': 'wok.n.01', 'synonyms': ['wok'], 'id': 1192, 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'id': 1193, 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'id': 1194, 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'id': 1195, 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'id': 1196, 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'f', 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'id': 1197, 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'id': 1198, 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'c', 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'id': 1199, 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'c', 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'id': 1200, 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'c', 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'id': 1201, 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'id': 1202, 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'id': 1203, 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}]  # noqa
+# fmt: on
diff --git a/src/sts/detectron2/data/datasets/pascal_voc.py b/src/sts/detectron2/data/datasets/pascal_voc.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbbf82cb96442bfa0cf05ed0f4dddf3645434b7e
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/pascal_voc.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import numpy as np
+import os
+import xml.etree.ElementTree as ET
+from typing import List, Tuple, Union
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+from detectron2.utils.file_io import PathManager
+
+__all__ = ["load_voc_instances", "register_pascal_voc"]
+
+
+# fmt: off
+CLASS_NAMES = (
+    "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
+    "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
+    "pottedplant", "sheep", "sofa", "train", "tvmonitor"
+)
+# fmt: on
+
+
+def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
+    """
+    Load Pascal VOC detection annotations to Detectron2 format.
+
+    Args:
+        dirname: Contain "Annotations", "ImageSets", "JPEGImages"
+        split (str): one of "train", "test", "val", "trainval"
+        class_names: list or tuple of class names
+    """
+    with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
+        fileids = np.loadtxt(f, dtype=np.str)
+
+    # Needs to read many small annotation files. Makes sense at local
+    annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
+    dicts = []
+    for fileid in fileids:
+        anno_file = os.path.join(annotation_dirname, fileid + ".xml")
+        jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
+
+        with PathManager.open(anno_file) as f:
+            tree = ET.parse(f)
+
+        r = {
+            "file_name": jpeg_file,
+            "image_id": fileid,
+            "height": int(tree.findall("./size/height")[0].text),
+            "width": int(tree.findall("./size/width")[0].text),
+        }
+        instances = []
+
+        for obj in tree.findall("object"):
+            cls = obj.find("name").text
+            # We include "difficult" samples in training.
+            # Based on limited experiments, they don't hurt accuracy.
+            # difficult = int(obj.find("difficult").text)
+            # if difficult == 1:
+            # continue
+            bbox = obj.find("bndbox")
+            bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
+            # Original annotations are integers in the range [1, W or H]
+            # Assuming they mean 1-based pixel indices (inclusive),
+            # a box with annotation (xmin=1, xmax=W) covers the whole image.
+            # In coordinate space this is represented by (xmin=0, xmax=W)
+            bbox[0] -= 1.0
+            bbox[1] -= 1.0
+            instances.append(
+                {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
+            )
+        r["annotations"] = instances
+        dicts.append(r)
+    return dicts
+
+
+def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES):
+    DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names))
+    MetadataCatalog.get(name).set(
+        thing_classes=list(class_names), dirname=dirname, year=year, split=split
+    )
diff --git a/src/sts/detectron2/data/datasets/register_coco.py b/src/sts/detectron2/data/datasets/register_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..e564438d5bf016bcdbb65b4bbdc215d79f579f8a
--- /dev/null
+++ b/src/sts/detectron2/data/datasets/register_coco.py
@@ -0,0 +1,3 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .coco import register_coco_instances  # noqa
+from .coco_panoptic import register_coco_panoptic_separated  # noqa
diff --git a/src/sts/detectron2/data/detection_utils.py b/src/sts/detectron2/data/detection_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2b485ed937ea8ecc5c7efdf976a8d22306f5b57
--- /dev/null
+++ b/src/sts/detectron2/data/detection_utils.py
@@ -0,0 +1,605 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+Common data processing utilities that are used in a
+typical object detection data pipeline.
+"""
+import logging
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+from PIL import Image
+
+from detectron2.structures import (
+    BitMasks,
+    Boxes,
+    BoxMode,
+    Instances,
+    Keypoints,
+    PolygonMasks,
+    RotatedBoxes,
+    polygons_to_bitmask,
+)
+from detectron2.utils.file_io import PathManager
+
+from . import transforms as T
+from .catalog import MetadataCatalog
+
+__all__ = [
+    "SizeMismatchError",
+    "convert_image_to_rgb",
+    "check_image_size",
+    "transform_proposals",
+    "transform_instance_annotations",
+    "annotations_to_instances",
+    "annotations_to_instances_rotated",
+    "build_augmentation",
+    "build_transform_gen",
+    "create_keypoint_hflip_indices",
+    "filter_empty_instances",
+    "read_image",
+]
+
+
+class SizeMismatchError(ValueError):
+    """
+    When loaded image has difference width/height compared with annotation.
+    """
+
+
+# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
+_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
+_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]]
+
+# https://www.exiv2.org/tags.html
+_EXIF_ORIENT = 274  # exif 'Orientation' tag
+
+
+def convert_PIL_to_numpy(image, format):
+    """
+    Convert PIL image to numpy array of target format.
+
+    Args:
+        image (PIL.Image): a PIL image
+        format (str): the format of output image
+
+    Returns:
+        (np.ndarray): also see `read_image`
+    """
+    if format is not None:
+        # PIL only supports RGB, so convert to RGB and flip channels over below
+        conversion_format = format
+        if format in ["BGR", "YUV-BT.601"]:
+            conversion_format = "RGB"
+        image = image.convert(conversion_format)
+    image = np.asarray(image)
+    # PIL squeezes out the channel dimension for "L", so make it HWC
+    if format == "L":
+        image = np.expand_dims(image, -1)
+
+    # handle formats not supported by PIL
+    elif format == "BGR":
+        # flip channels if needed
+        image = image[:, :, ::-1]
+    elif format == "YUV-BT.601":
+        image = image / 255.0
+        image = np.dot(image, np.array(_M_RGB2YUV).T)
+
+    return image
+
+
+def convert_image_to_rgb(image, format):
+    """
+    Convert an image from given format to RGB.
+
+    Args:
+        image (np.ndarray or Tensor): an HWC image
+        format (str): the format of input image, also see `read_image`
+
+    Returns:
+        (np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8
+    """
+    if isinstance(image, torch.Tensor):
+        image = image.cpu().numpy()
+    if format == "BGR":
+        image = image[:, :, [2, 1, 0]]
+    elif format == "YUV-BT.601":
+        image = np.dot(image, np.array(_M_YUV2RGB).T)
+        image = image * 255.0
+    else:
+        if format == "L":
+            image = image[:, :, 0]
+        image = image.astype(np.uint8)
+        image = np.asarray(Image.fromarray(image, mode=format).convert("RGB"))
+    return image
+
+
+def _apply_exif_orientation(image):
+    """
+    Applies the exif orientation correctly.
+
+    This code exists per the bug:
+      https://github.com/python-pillow/Pillow/issues/3973
+    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
+    various methods, especially `tobytes`
+
+    Function based on:
+      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
+      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
+
+    Args:
+        image (PIL.Image): a PIL image
+
+    Returns:
+        (PIL.Image): the PIL image with exif orientation applied, if applicable
+    """
+    if not hasattr(image, "getexif"):
+        return image
+
+    try:
+        exif = image.getexif()
+    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
+        exif = None
+
+    if exif is None:
+        return image
+
+    orientation = exif.get(_EXIF_ORIENT)
+
+    method = {
+        2: Image.FLIP_LEFT_RIGHT,
+        3: Image.ROTATE_180,
+        4: Image.FLIP_TOP_BOTTOM,
+        5: Image.TRANSPOSE,
+        6: Image.ROTATE_270,
+        7: Image.TRANSVERSE,
+        8: Image.ROTATE_90,
+    }.get(orientation)
+
+    if method is not None:
+        return image.transpose(method)
+    return image
+
+
+def read_image(file_name, format=None):
+    """
+    Read an image into the given format.
+    Will apply rotation and flipping if the image has such exif information.
+
+    Args:
+        file_name (str): image file path
+        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
+
+    Returns:
+        image (np.ndarray):
+            an HWC image in the given format, which is 0-255, uint8 for
+            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
+    """
+    with PathManager.open(file_name, "rb") as f:
+        image = Image.open(f)
+
+        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
+        image = _apply_exif_orientation(image)
+        return convert_PIL_to_numpy(image, format)
+
+
+def check_image_size(dataset_dict, image):
+    """
+    Raise an error if the image does not match the size specified in the dict.
+    """
+    if "width" in dataset_dict or "height" in dataset_dict:
+        image_wh = (image.shape[1], image.shape[0])
+        expected_wh = (dataset_dict["width"], dataset_dict["height"])
+        if not image_wh == expected_wh:
+            raise SizeMismatchError(
+                "Mismatched image shape{}, got {}, expect {}.".format(
+                    " for image " + dataset_dict["file_name"]
+                    if "file_name" in dataset_dict
+                    else "",
+                    image_wh,
+                    expected_wh,
+                )
+                + " Please check the width/height in your annotation."
+            )
+
+    # To ensure bbox always remap to original image size
+    if "width" not in dataset_dict:
+        dataset_dict["width"] = image.shape[1]
+    if "height" not in dataset_dict:
+        dataset_dict["height"] = image.shape[0]
+
+
+def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0):
+    """
+    Apply transformations to the proposals in dataset_dict, if any.
+
+    Args:
+        dataset_dict (dict): a dict read from the dataset, possibly
+            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
+        image_shape (tuple): height, width
+        transforms (TransformList):
+        proposal_topk (int): only keep top-K scoring proposals
+        min_box_size (int): proposals with either side smaller than this
+            threshold are removed
+
+    The input dict is modified in-place, with abovementioned keys removed. A new
+    key "proposals" will be added. Its value is an `Instances`
+    object which contains the transformed proposals in its field
+    "proposal_boxes" and "objectness_logits".
+    """
+    if "proposal_boxes" in dataset_dict:
+        # Transform proposal boxes
+        boxes = transforms.apply_box(
+            BoxMode.convert(
+                dataset_dict.pop("proposal_boxes"),
+                dataset_dict.pop("proposal_bbox_mode"),
+                BoxMode.XYXY_ABS,
+            )
+        )
+        boxes = Boxes(boxes)
+        objectness_logits = torch.as_tensor(
+            dataset_dict.pop("proposal_objectness_logits").astype("float32")
+        )
+
+        boxes.clip(image_shape)
+        keep = boxes.nonempty(threshold=min_box_size)
+        boxes = boxes[keep]
+        objectness_logits = objectness_logits[keep]
+
+        proposals = Instances(image_shape)
+        proposals.proposal_boxes = boxes[:proposal_topk]
+        proposals.objectness_logits = objectness_logits[:proposal_topk]
+        dataset_dict["proposals"] = proposals
+
+
+def transform_instance_annotations(
+    annotation, transforms, image_size, *, keypoint_hflip_indices=None
+):
+    """
+    Apply transforms to box, segmentation and keypoints annotations of a single instance.
+
+    It will use `transforms.apply_box` for the box, and
+    `transforms.apply_coords` for segmentation polygons & keypoints.
+    If you need anything more specially designed for each data structure,
+    you'll need to implement your own version of this function or the transforms.
+
+    Args:
+        annotation (dict): dict of instance annotations for a single instance.
+            It will be modified in-place.
+        transforms (TransformList or list[Transform]):
+        image_size (tuple): the height, width of the transformed image
+        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
+
+    Returns:
+        dict:
+            the same input dict with fields "bbox", "segmentation", "keypoints"
+            transformed according to `transforms`.
+            The "bbox_mode" field will be set to XYXY_ABS.
+    """
+    if isinstance(transforms, (tuple, list)):
+        transforms = T.TransformList(transforms)
+    # bbox is 1d (per-instance bounding box)
+    bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
+    # clip transformed bbox to image size
+    bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0)
+    annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1])
+    annotation["bbox_mode"] = BoxMode.XYXY_ABS
+    if "segmentation" in annotation:
+        # each instance contains 1 or more polygons
+        segm = annotation["segmentation"]
+        if isinstance(segm, list):
+            # polygons
+            polygons = [np.asarray(p).reshape(-1, 2) for p in segm]
+            annotation["segmentation"] = [
+                p.reshape(-1) for p in transforms.apply_polygons(polygons)
+            ]
+        elif isinstance(segm, dict):
+            # RLE
+            mask = mask_util.decode(segm)
+            mask = transforms.apply_segmentation(mask)
+            assert tuple(mask.shape[:2]) == image_size
+            annotation["segmentation"] = mask
+        else:
+            raise ValueError(
+                "Cannot transform segmentation of type '{}'!"
+                "Supported types are: polygons as list[list[float] or ndarray],"
+                " COCO-style RLE as a dict.".format(type(segm))
+            )
+    if "keypoints" in annotation:
+        keypoints = transform_keypoint_annotations(
+            annotation["keypoints"], transforms, image_size, keypoint_hflip_indices
+        )
+        annotation["keypoints"] = keypoints
+    return annotation
+
+
+def transform_keypoint_annotations(keypoints, transforms, image_size, keypoint_hflip_indices=None):
+    """
+    Transform keypoint annotations of an image.
+    If a keypoint is transformed out of image boundary, it will be marked "unlabeled" (visibility=0)
+
+    Args:
+        keypoints (list[float]): Nx3 float in Detectron2's Dataset format.
+            Each point is represented by (x, y, visibility).
+        transforms (TransformList):
+        image_size (tuple): the height, width of the transformed image
+        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
+            When `transforms` includes horizontal flip, will use the index
+            mapping to flip keypoints.
+    """
+    # (N*3,) -> (N, 3)
+    keypoints = np.asarray(keypoints, dtype="float64").reshape(-1, 3)
+    keypoints_xy = transforms.apply_coords(keypoints[:, :2])
+
+    # Set all out-of-boundary points to "unlabeled"
+    inside = (keypoints_xy >= np.array([0, 0])) & (keypoints_xy <= np.array(image_size[::-1]))
+    inside = inside.all(axis=1)
+    keypoints[:, :2] = keypoints_xy
+    keypoints[:, 2][~inside] = 0
+
+    # This assumes that HorizFlipTransform is the only one that does flip
+    do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+
+    # Alternative way: check if probe points was horizontally flipped.
+    # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]])
+    # probe_aug = transforms.apply_coords(probe.copy())
+    # do_hflip = np.sign(probe[1][0] - probe[0][0]) != np.sign(probe_aug[1][0] - probe_aug[0][0])  # noqa
+
+    # If flipped, swap each keypoint with its opposite-handed equivalent
+    if do_hflip:
+        assert keypoint_hflip_indices is not None
+        keypoints = keypoints[keypoint_hflip_indices, :]
+
+    # Maintain COCO convention that if visibility == 0 (unlabeled), then x, y = 0
+    keypoints[keypoints[:, 2] == 0] = 0
+    return keypoints
+
+
+def annotations_to_instances(annos, image_size, mask_format="polygon"):
+    """
+    Create an :class:`Instances` object used by the models,
+    from instance annotations in the dataset dict.
+
+    Args:
+        annos (list[dict]): a list of instance annotations in one image, each
+            element for one instance.
+        image_size (tuple): height, width
+
+    Returns:
+        Instances:
+            It will contain fields "gt_boxes", "gt_classes",
+            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
+            This is the format that builtin models expect.
+    """
+    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
+    target = Instances(image_size)
+    target.gt_boxes = Boxes(boxes)
+    boxes_feat = [BoxMode.convert(obj["bbox"]/4, obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
+    target.gt_boxes_feat = Boxes(boxes_feat)
+    classes = [int(obj["category_id"]) for obj in annos]
+    classes = torch.tensor(classes, dtype=torch.int64)
+    target.gt_classes = classes
+    
+    rec = [obj["rec"] for obj in annos]
+    rec = torch.tensor(rec, dtype=torch.int64)
+    target.rec = rec
+    
+    if len(annos) and "segmentation" in annos[0]:
+        segms = [obj["segmentation"] for obj in annos]
+        #segms_feat = [[obj["segmentation"][0]/4] for obj in annos]
+        if mask_format == "polygon":
+            try:
+                masks = PolygonMasks(segms)
+                #masks_feat = PolygonMasks(segms_feat)
+            except ValueError as e:
+                raise ValueError(
+                    "Failed to use mask_format=='polygon' from the given annotations!"
+                ) from e
+        else:
+            assert mask_format == "bitmask", mask_format
+            masks = []
+            for segm in segms:
+                if isinstance(segm, list):
+                    # polygon
+                    masks.append(polygons_to_bitmask(segm, *image_size))
+                elif isinstance(segm, dict):
+                    # COCO RLE
+                    masks.append(mask_util.decode(segm))
+                elif isinstance(segm, np.ndarray):
+                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
+                        segm.ndim
+                    )
+                    # mask array
+                    masks.append(segm)
+                else:
+                    raise ValueError(
+                        "Cannot convert segmentation of type '{}' to BitMasks!"
+                        "Supported types are: polygons as list[list[float] or ndarray],"
+                        " COCO-style RLE as a dict, or a binary segmentation mask "
+                        " in a 2D numpy array of shape HxW.".format(type(segm))
+                    )
+            # torch.from_numpy does not support array with negative stride.
+            masks = BitMasks(
+                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
+            )
+        target.gt_masks = masks
+        #target.gt_masks_feat = masks_feat
+    if len(annos) and "keypoints" in annos[0]:
+        kpts = [obj.get("keypoints", []) for obj in annos]
+        target.gt_keypoints = Keypoints(kpts)
+
+    return target
+
+
+def annotations_to_instances_rotated(annos, image_size):
+    """
+    Create an :class:`Instances` object used by the models,
+    from instance annotations in the dataset dict.
+    Compared to `annotations_to_instances`, this function is for rotated boxes only
+
+    Args:
+        annos (list[dict]): a list of instance annotations in one image, each
+            element for one instance.
+        image_size (tuple): height, width
+
+    Returns:
+        Instances:
+            Containing fields "gt_boxes", "gt_classes",
+            if they can be obtained from `annos`.
+            This is the format that builtin models expect.
+    """
+    boxes = [obj["bbox"] for obj in annos]
+    target = Instances(image_size)
+    boxes = target.gt_boxes = RotatedBoxes(boxes)
+    boxes.clip(image_size)
+
+    classes = [obj["category_id"] for obj in annos]
+    classes = torch.tensor(classes, dtype=torch.int64)
+    target.gt_classes = classes
+
+    return target
+
+
+def filter_empty_instances(instances, by_box=True, by_mask=True, box_threshold=1e-5):
+    """
+    Filter out empty instances in an `Instances` object.
+
+    Args:
+        instances (Instances):
+        by_box (bool): whether to filter out instances with empty boxes
+        by_mask (bool): whether to filter out instances with empty masks
+        box_threshold (float): minimum width and height to be considered non-empty
+
+    Returns:
+        Instances: the filtered instances.
+    """
+    assert by_box or by_mask
+    r = []
+    if by_box:
+        r.append(instances.gt_boxes.nonempty(threshold=box_threshold))
+    if instances.has("gt_masks") and by_mask:
+        r.append(instances.gt_masks.nonempty())
+
+    # TODO: can also filter visible keypoints
+
+    if not r:
+        return instances
+    m = r[0]
+    for x in r[1:]:
+        m = m & x
+    return instances[m]
+
+
+def create_keypoint_hflip_indices(dataset_names):
+    """
+    Args:
+        dataset_names (list[str]): list of dataset names
+    Returns:
+        ndarray[int]: a vector of size=#keypoints, storing the
+        horizontally-flipped keypoint indices.
+    """
+
+    check_metadata_consistency("keypoint_names", dataset_names)
+    check_metadata_consistency("keypoint_flip_map", dataset_names)
+
+    meta = MetadataCatalog.get(dataset_names[0])
+    names = meta.keypoint_names
+    # TODO flip -> hflip
+    flip_map = dict(meta.keypoint_flip_map)
+    flip_map.update({v: k for k, v in flip_map.items()})
+    flipped_names = [i if i not in flip_map else flip_map[i] for i in names]
+    flip_indices = [names.index(i) for i in flipped_names]
+    return np.asarray(flip_indices, dtype=np.int32)
+
+
+def gen_crop_transform_with_instance(crop_size, image_size, instance):
+    """
+    Generate a CropTransform so that the cropping region contains
+    the center of the given instance.
+
+    Args:
+        crop_size (tuple): h, w in pixels
+        image_size (tuple): h, w
+        instance (dict): an annotation dict of one instance, in Detectron2's
+            dataset format.
+    """
+    crop_size = np.asarray(crop_size, dtype=np.int32)
+    bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
+    center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
+    assert (
+        image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
+    ), "The annotation bounding box is outside of the image!"
+    assert (
+        image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
+    ), "Crop size is larger than image size!"
+
+    min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
+    max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
+    max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
+
+    y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
+    x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
+    return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
+
+
+def check_metadata_consistency(key, dataset_names):
+    """
+    Check that the datasets have consistent metadata.
+
+    Args:
+        key (str): a metadata key
+        dataset_names (list[str]): a list of dataset names
+
+    Raises:
+        AttributeError: if the key does not exist in the metadata
+        ValueError: if the given datasets do not have the same metadata values defined by key
+    """
+    if len(dataset_names) == 0:
+        return
+    logger = logging.getLogger(__name__)
+    entries_per_dataset = [getattr(MetadataCatalog.get(d), key) for d in dataset_names]
+    for idx, entry in enumerate(entries_per_dataset):
+        if entry != entries_per_dataset[0]:
+            logger.error(
+                "Metadata '{}' for dataset '{}' is '{}'".format(key, dataset_names[idx], str(entry))
+            )
+            logger.error(
+                "Metadata '{}' for dataset '{}' is '{}'".format(
+                    key, dataset_names[0], str(entries_per_dataset[0])
+                )
+            )
+            raise ValueError("Datasets have different metadata '{}'!".format(key))
+
+
+def build_augmentation(cfg, is_train):
+    """
+    Create a list of default :class:`Augmentation` from config.
+    Now it includes resizing and flipping.
+
+    Returns:
+        list[Augmentation]
+    """
+    if is_train:
+        min_size = cfg.INPUT.MIN_SIZE_TRAIN
+        max_size = cfg.INPUT.MAX_SIZE_TRAIN
+        sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
+    else:
+        min_size = cfg.INPUT.MIN_SIZE_TEST
+        max_size = cfg.INPUT.MAX_SIZE_TEST
+        sample_style = "choice"
+    augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)]
+    if is_train and cfg.INPUT.RANDOM_FLIP != "none":
+        augmentation.append(
+            T.RandomFlip(
+                horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
+                vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
+            )
+        )
+    return augmentation
+
+
+build_transform_gen = build_augmentation
+"""
+Alias for backward-compatibility.
+"""
diff --git a/src/sts/detectron2/data/samplers/__init__.py b/src/sts/detectron2/data/samplers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bacd895756cedbc9b37fe24af6dbcd8a054246b
--- /dev/null
+++ b/src/sts/detectron2/data/samplers/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
+from .grouped_batch_sampler import GroupedBatchSampler
+
+__all__ = [
+    "GroupedBatchSampler",
+    "TrainingSampler",
+    "InferenceSampler",
+    "RepeatFactorTrainingSampler",
+]
diff --git a/src/sts/detectron2/data/samplers/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/data/samplers/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..54964c74e13d0e76b564c614cfa2d7e5a5c08431
Binary files /dev/null and b/src/sts/detectron2/data/samplers/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/samplers/__pycache__/distributed_sampler.cpython-38.pyc b/src/sts/detectron2/data/samplers/__pycache__/distributed_sampler.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8890af4c959216a63b6554d2893ca90887df6fef
Binary files /dev/null and b/src/sts/detectron2/data/samplers/__pycache__/distributed_sampler.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/samplers/__pycache__/grouped_batch_sampler.cpython-38.pyc b/src/sts/detectron2/data/samplers/__pycache__/grouped_batch_sampler.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2313b3d29b24a10f402b350b1601661753aa9a19
Binary files /dev/null and b/src/sts/detectron2/data/samplers/__pycache__/grouped_batch_sampler.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/samplers/distributed_sampler.py b/src/sts/detectron2/data/samplers/distributed_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0e8da28822ec071a04caaac2069e6148bc90622
--- /dev/null
+++ b/src/sts/detectron2/data/samplers/distributed_sampler.py
@@ -0,0 +1,200 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import math
+from collections import defaultdict
+from typing import Optional
+import torch
+from torch.utils.data.sampler import Sampler
+
+from detectron2.utils import comm
+
+
+class TrainingSampler(Sampler):
+    """
+    In training, we only care about the "infinite stream" of training data.
+    So this sampler produces an infinite stream of indices and
+    all workers cooperate to correctly shuffle the indices and sample different indices.
+
+    The samplers in each worker effectively produces `indices[worker_id::num_workers]`
+    where `indices` is an infinite stream of indices consisting of
+    `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
+    or `range(size) + range(size) + ...` (if shuffle is False)
+    """
+
+    def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None):
+        """
+        Args:
+            size (int): the total number of data of the underlying dataset to sample from
+            shuffle (bool): whether to shuffle the indices or not
+            seed (int): the initial seed of the shuffle. Must be the same
+                across all workers. If None, will use a random seed shared
+                among workers (require synchronization among all workers).
+        """
+        self._size = size
+        assert size > 0
+        self._shuffle = shuffle
+        if seed is None:
+            seed = comm.shared_random_seed()
+        self._seed = int(seed)
+
+        self._rank = comm.get_rank()
+        self._world_size = comm.get_world_size()
+
+    def __iter__(self):
+        start = self._rank
+        yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
+
+    def _infinite_indices(self):
+        g = torch.Generator()
+        g.manual_seed(self._seed)
+        while True:
+            if self._shuffle:
+                yield from torch.randperm(self._size, generator=g).tolist()
+            else:
+                yield from torch.arange(self._size).tolist()
+
+
+class RepeatFactorTrainingSampler(Sampler):
+    """
+    Similar to TrainingSampler, but a sample may appear more times than others based
+    on its "repeat factor". This is suitable for training on class imbalanced datasets like LVIS.
+    """
+
+    def __init__(self, repeat_factors, *, shuffle=True, seed=None):
+        """
+        Args:
+            repeat_factors (Tensor): a float vector, the repeat factor for each indice. When it's
+                full of ones, it is equivalent to ``TrainingSampler(len(repeat_factors), ...)``.
+            shuffle (bool): whether to shuffle the indices or not
+            seed (int): the initial seed of the shuffle. Must be the same
+                across all workers. If None, will use a random seed shared
+                among workers (require synchronization among all workers).
+        """
+        self._shuffle = shuffle
+        if seed is None:
+            seed = comm.shared_random_seed()
+        self._seed = int(seed)
+
+        self._rank = comm.get_rank()
+        self._world_size = comm.get_world_size()
+
+        # Split into whole number (_int_part) and fractional (_frac_part) parts.
+        self._int_part = torch.trunc(repeat_factors)
+        self._frac_part = repeat_factors - self._int_part
+
+    @staticmethod
+    def repeat_factors_from_category_frequency(dataset_dicts, repeat_thresh):
+        """
+        Compute (fractional) per-image repeat factors based on category frequency.
+        The repeat factor for an image is a function of the frequency of the rarest
+        category labeled in that image. The "frequency of category c" in [0, 1] is defined
+        as the fraction of images in the training set (without repeats) in which category c
+        appears.
+        See :paper:`lvis` (>= v2) Appendix B.2.
+
+        Args:
+            dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
+            repeat_thresh (float): frequency threshold below which data is repeated.
+                If the frequency is half of `repeat_thresh`, the image will be
+                repeated twice.
+
+        Returns:
+            torch.Tensor:
+                the i-th element is the repeat factor for the dataset image at index i.
+        """
+        # 1. For each category c, compute the fraction of images that contain it: f(c)
+        category_freq = defaultdict(int)
+        for dataset_dict in dataset_dicts:  # For each image (without repeats)
+            cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]}
+            for cat_id in cat_ids:
+                category_freq[cat_id] += 1
+        num_images = len(dataset_dicts)
+        for k, v in category_freq.items():
+            category_freq[k] = v / num_images
+
+        # 2. For each category c, compute the category-level repeat factor:
+        #    r(c) = max(1, sqrt(t / f(c)))
+        category_rep = {
+            cat_id: max(1.0, math.sqrt(repeat_thresh / cat_freq))
+            for cat_id, cat_freq in category_freq.items()
+        }
+
+        # 3. For each image I, compute the image-level repeat factor:
+        #    r(I) = max_{c in I} r(c)
+        rep_factors = []
+        for dataset_dict in dataset_dicts:
+            cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]}
+            rep_factor = max({category_rep[cat_id] for cat_id in cat_ids}, default=1.0)
+            rep_factors.append(rep_factor)
+
+        return torch.tensor(rep_factors, dtype=torch.float32)
+
+    def _get_epoch_indices(self, generator):
+        """
+        Create a list of dataset indices (with repeats) to use for one epoch.
+
+        Args:
+            generator (torch.Generator): pseudo random number generator used for
+                stochastic rounding.
+
+        Returns:
+            torch.Tensor: list of dataset indices to use in one epoch. Each index
+                is repeated based on its calculated repeat factor.
+        """
+        # Since repeat factors are fractional, we use stochastic rounding so
+        # that the target repeat factor is achieved in expectation over the
+        # course of training
+        rands = torch.rand(len(self._frac_part), generator=generator)
+        rep_factors = self._int_part + (rands < self._frac_part).float()
+        # Construct a list of indices in which we repeat images as specified
+        indices = []
+        for dataset_index, rep_factor in enumerate(rep_factors):
+            indices.extend([dataset_index] * int(rep_factor.item()))
+        return torch.tensor(indices, dtype=torch.int64)
+
+    def __iter__(self):
+        start = self._rank
+        yield from itertools.islice(self._infinite_indices(), start, None, self._world_size)
+
+    def _infinite_indices(self):
+        g = torch.Generator()
+        g.manual_seed(self._seed)
+        while True:
+            # Sample indices with repeats determined by stochastic rounding; each
+            # "epoch" may have a slightly different size due to the rounding.
+            indices = self._get_epoch_indices(g)
+            if self._shuffle:
+                randperm = torch.randperm(len(indices), generator=g)
+                yield from indices[randperm].tolist()
+            else:
+                yield from indices.tolist()
+
+
+class InferenceSampler(Sampler):
+    """
+    Produce indices for inference across all workers.
+    Inference needs to run on the __exact__ set of samples,
+    therefore when the total number of samples is not divisible by the number of workers,
+    this sampler produces different number of samples on different workers.
+    """
+
+    def __init__(self, size: int):
+        """
+        Args:
+            size (int): the total number of data of the underlying dataset to sample from
+        """
+        self._size = size
+        assert size > 0
+        self._rank = comm.get_rank()
+        self._world_size = comm.get_world_size()
+
+        shard_size = (self._size - 1) // self._world_size + 1
+        begin = shard_size * self._rank
+        end = min(shard_size * (self._rank + 1), self._size)
+        self._local_indices = range(begin, end)
+
+    def __iter__(self):
+        yield from self._local_indices
+
+    def __len__(self):
+        return len(self._local_indices)
diff --git a/src/sts/detectron2/data/samplers/grouped_batch_sampler.py b/src/sts/detectron2/data/samplers/grouped_batch_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b247730aacd04dd0c752664acde3257c4eddd71
--- /dev/null
+++ b/src/sts/detectron2/data/samplers/grouped_batch_sampler.py
@@ -0,0 +1,47 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from torch.utils.data.sampler import BatchSampler, Sampler
+
+
+class GroupedBatchSampler(BatchSampler):
+    """
+    Wraps another sampler to yield a mini-batch of indices.
+    It enforces that the batch only contain elements from the same group.
+    It also tries to provide mini-batches which follows an ordering which is
+    as close as possible to the ordering from the original sampler.
+    """
+
+    def __init__(self, sampler, group_ids, batch_size):
+        """
+        Args:
+            sampler (Sampler): Base sampler.
+            group_ids (list[int]): If the sampler produces indices in range [0, N),
+                `group_ids` must be a list of `N` ints which contains the group id of each sample.
+                The group ids must be a set of integers in the range [0, num_groups).
+            batch_size (int): Size of mini-batch.
+        """
+        if not isinstance(sampler, Sampler):
+            raise ValueError(
+                "sampler should be an instance of "
+                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
+            )
+        self.sampler = sampler
+        self.group_ids = np.asarray(group_ids)
+        assert self.group_ids.ndim == 1
+        self.batch_size = batch_size
+        groups = np.unique(self.group_ids).tolist()
+
+        # buffer the indices of each group until batch size is reached
+        self.buffer_per_group = {k: [] for k in groups}
+
+    def __iter__(self):
+        for idx in self.sampler:
+            group_id = self.group_ids[idx]
+            group_buffer = self.buffer_per_group[group_id]
+            group_buffer.append(idx)
+            if len(group_buffer) == self.batch_size:
+                yield group_buffer[:]  # yield a copy of the list
+                del group_buffer[:]
+
+    def __len__(self):
+        raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.")
diff --git a/src/sts/detectron2/data/transforms/__init__.py b/src/sts/detectron2/data/transforms/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab3c63b5b456a7fb878757e25768a3634f76ae5b
--- /dev/null
+++ b/src/sts/detectron2/data/transforms/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from fvcore.transforms.transform import Transform, TransformList  # order them first
+from fvcore.transforms.transform import *
+from .transform import *
+from .augmentation import *
+from .augmentation_impl import *
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/src/sts/detectron2/data/transforms/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/data/transforms/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eb2c67d4adf4ca0babc3398ed73c09e48adfb93b
Binary files /dev/null and b/src/sts/detectron2/data/transforms/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/transforms/__pycache__/augmentation.cpython-38.pyc b/src/sts/detectron2/data/transforms/__pycache__/augmentation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..84e1b196a0c3fab5fb1a5fb036e2c6f7beea435c
Binary files /dev/null and b/src/sts/detectron2/data/transforms/__pycache__/augmentation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/transforms/__pycache__/augmentation_impl.cpython-38.pyc b/src/sts/detectron2/data/transforms/__pycache__/augmentation_impl.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99bc4e8df2c2980c3ddf70755393ad09f336465c
Binary files /dev/null and b/src/sts/detectron2/data/transforms/__pycache__/augmentation_impl.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/transforms/__pycache__/transform.cpython-38.pyc b/src/sts/detectron2/data/transforms/__pycache__/transform.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..18466569160d6bb5567f3bc815c1821f099bd5e8
Binary files /dev/null and b/src/sts/detectron2/data/transforms/__pycache__/transform.cpython-38.pyc differ
diff --git a/src/sts/detectron2/data/transforms/augmentation.py b/src/sts/detectron2/data/transforms/augmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..917290bf634b4bbd21fd70fbc14dabd6705fea33
--- /dev/null
+++ b/src/sts/detectron2/data/transforms/augmentation.py
@@ -0,0 +1,377 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import inspect
+import numpy as np
+import pprint
+from typing import Any, List, Optional, Tuple, Union
+from fvcore.transforms.transform import Transform, TransformList
+
+"""
+See "Data Augmentation" tutorial for an overview of the system:
+https://detectron2.readthedocs.io/tutorials/augmentation.html
+"""
+
+
+__all__ = [
+    "Augmentation",
+    "AugmentationList",
+    "AugInput",
+    "TransformGen",
+    "apply_transform_gens",
+    "StandardAugInput",
+    "apply_augmentations",
+]
+
+
+def _check_img_dtype(img):
+    assert isinstance(img, np.ndarray), "[Augmentation] Needs an numpy array, but got a {}!".format(
+        type(img)
+    )
+    assert not isinstance(img.dtype, np.integer) or (
+        img.dtype == np.uint8
+    ), "[Augmentation] Got image of type {}, use uint8 or floating points instead!".format(
+        img.dtype
+    )
+    assert img.ndim in [2, 3], img.ndim
+
+
+def _get_aug_input_args(aug, aug_input) -> List[Any]:
+    """
+    Get the arguments to be passed to ``aug.get_transform`` from the input ``aug_input``.
+    """
+    if aug.input_args is None:
+        # Decide what attributes are needed automatically
+        prms = list(inspect.signature(aug.get_transform).parameters.items())
+        # The default behavior is: if there is one parameter, then its "image"
+        # (work automatically for majority of use cases, and also avoid BC breaking),
+        # Otherwise, use the argument names.
+        if len(prms) == 1:
+            names = ("image",)
+        else:
+            names = []
+            for name, prm in prms:
+                if prm.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
+                    raise TypeError(
+                        f""" \
+The default implementation of `{type(aug)}.__call__` does not allow \
+`{type(aug)}.get_transform` to use variable-length arguments (*args, **kwargs)! \
+If arguments are unknown, reimplement `__call__` instead. \
+"""
+                    )
+                names.append(name)
+        aug.input_args = tuple(names)
+
+    args = []
+    for f in aug.input_args:
+        try:
+            args.append(getattr(aug_input, f))
+        except AttributeError as e:
+            raise AttributeError(
+                f"{type(aug)}.get_transform needs input attribute '{f}', "
+                f"but it is not an attribute of {type(aug_input)}!"
+            ) from e
+    return args
+
+
+class Augmentation:
+    """
+    Augmentation defines (often random) policies/strategies to generate :class:`Transform`
+    from data. It is often used for pre-processing of input data.
+
+    A "policy" that generates a :class:`Transform` may, in the most general case,
+    need arbitrary information from input data in order to determine what transforms
+    to apply. Therefore, each :class:`Augmentation` instance defines the arguments
+    needed by its :meth:`get_transform` method. When called with the positional arguments,
+    the :meth:`get_transform` method executes the policy.
+
+    Note that :class:`Augmentation` defines the policies to create a :class:`Transform`,
+    but not how to execute the actual transform operations to those data.
+    Its :meth:`__call__` method will use :meth:`AugInput.transform` to execute the transform.
+
+    The returned `Transform` object is meant to describe deterministic transformation, which means
+    it can be re-applied on associated data, e.g. the geometry of an image and its segmentation
+    masks need to be transformed together.
+    (If such re-application is not needed, then determinism is not a crucial requirement.)
+    """
+
+    input_args: Optional[Tuple[str]] = None
+    """
+    Stores the attribute names needed by :meth:`get_transform`, e.g.  ``("image", "sem_seg")``.
+    By default, it is just a tuple of argument names in :meth:`self.get_transform`, which often only
+    contain "image". As long as the argument name convention is followed, there is no need for
+    users to touch this attribute.
+    """
+
+    def _init(self, params=None):
+        if params:
+            for k, v in params.items():
+                if k != "self" and not k.startswith("_"):
+                    setattr(self, k, v)
+
+    def get_transform(self, *args) -> Transform:
+        """
+        Execute the policy based on input data, and decide what transform to apply to inputs.
+
+        Args:
+            args: Any fixed-length positional arguments. By default, the name of the arguments
+                should exist in the :class:`AugInput` to be used.
+
+        Returns:
+            Transform: Returns the deterministic transform to apply to the input.
+
+        Examples:
+        ::
+            class MyAug:
+                # if a policy needs to know both image and semantic segmentation
+                def get_transform(image, sem_seg) -> T.Transform:
+                    pass
+            tfm: Transform = MyAug().get_transform(image, sem_seg)
+            new_image = tfm.apply_image(image)
+
+        Notes:
+            Users can freely use arbitrary new argument names in custom
+            :meth:`get_transform` method, as long as they are available in the
+            input data. In detectron2 we use the following convention:
+
+            * image: (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or
+              floating point in range [0, 1] or [0, 255].
+            * boxes: (N,4) ndarray of float32. It represents the instance bounding boxes
+              of N instances. Each is in XYXY format in unit of absolute coordinates.
+            * sem_seg: (H,W) ndarray of type uint8. Each element is an integer label of pixel.
+
+            We do not specify convention for other types and do not include builtin
+            :class:`Augmentation` that uses other types in detectron2.
+        """
+        raise NotImplementedError
+
+    def __call__(self, aug_input) -> Transform:
+        """
+        Augment the given `aug_input` **in-place**, and return the transform that's used.
+
+        This method will be called to apply the augmentation. In most augmentation, it
+        is enough to use the default implementation, which calls :meth:`get_transform`
+        using the inputs. But a subclass can overwrite it to have more complicated logic.
+
+        Args:
+            aug_input (AugInput): an object that has attributes needed by this augmentation
+                (defined by ``self.get_transform``). Its ``transform`` method will be called
+                to in-place transform it.
+
+        Returns:
+            Transform: the transform that is applied on the input.
+        """
+        args = _get_aug_input_args(self, aug_input)
+        tfm = self.get_transform(*args)
+        assert isinstance(tfm, (Transform, TransformList)), (
+            f"{type(self)}.get_transform must return an instance of Transform! "
+            "Got {type(tfm)} instead."
+        )
+        aug_input.transform(tfm)
+        return tfm
+
+    def _rand_range(self, low=1.0, high=None, size=None):
+        """
+        Uniform float random number between low and high.
+        """
+        if high is None:
+            low, high = 0, low
+        if size is None:
+            size = []
+        return np.random.uniform(low, high, size)
+
+    def __repr__(self):
+        """
+        Produce something like:
+        "MyAugmentation(field1={self.field1}, field2={self.field2})"
+        """
+        try:
+            sig = inspect.signature(self.__init__)
+            classname = type(self).__name__
+            argstr = []
+            for name, param in sig.parameters.items():
+                assert (
+                    param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD
+                ), "The default __repr__ doesn't support *args or **kwargs"
+                assert hasattr(self, name), (
+                    "Attribute {} not found! "
+                    "Default __repr__ only works if attributes match the constructor.".format(name)
+                )
+                attr = getattr(self, name)
+                default = param.default
+                if default is attr:
+                    continue
+                attr_str = pprint.pformat(attr)
+                if "\n" in attr_str:
+                    # don't show it if pformat decides to use >1 lines
+                    attr_str = "..."
+                argstr.append("{}={}".format(name, attr_str))
+            return "{}({})".format(classname, ", ".join(argstr))
+        except AssertionError:
+            return super().__repr__()
+
+    __str__ = __repr__
+
+
+def _transform_to_aug(tfm_or_aug):
+    """
+    Wrap Transform into Augmentation.
+    Private, used internally to implement augmentations.
+    """
+    assert isinstance(tfm_or_aug, (Transform, Augmentation)), tfm_or_aug
+    if isinstance(tfm_or_aug, Augmentation):
+        return tfm_or_aug
+    else:
+
+        class _TransformToAug(Augmentation):
+            def __init__(self, tfm: Transform):
+                self.tfm = tfm
+
+            def get_transform(self, *args):
+                return self.tfm
+
+            def __repr__(self):
+                return repr(self.tfm)
+
+            __str__ = __repr__
+
+        return _TransformToAug(tfm_or_aug)
+
+
+class AugmentationList(Augmentation):
+    """
+    Apply a sequence of augmentations.
+
+    It has ``__call__`` method to apply the augmentations.
+
+    Note that :meth:`get_transform` method is impossible (will throw error if called)
+    for :class:`AugmentationList`, because in order to apply a sequence of augmentations,
+    the kth augmentation must be applied first, to provide inputs needed by the (k+1)th
+    augmentation.
+    """
+
+    def __init__(self, augs):
+        """
+        Args:
+            augs (list[Augmentation or Transform]):
+        """
+        super().__init__()
+        self.augs = [_transform_to_aug(x) for x in augs]
+
+    def __call__(self, aug_input) -> Transform:
+        tfms = []
+        for x in self.augs:
+            tfm = x(aug_input)
+            tfms.append(tfm)
+        return TransformList(tfms)
+
+    def __repr__(self):
+        msgs = [str(x) for x in self.augs]
+        return "AugmentationList[{}]".format(", ".join(msgs))
+
+    __str__ = __repr__
+
+
+class AugInput:
+    """
+    Input that can be used with :meth:`Augmentation.__call__`.
+    This is a standard implementation for the majority of use cases.
+    This class provides the standard attributes **"image", "boxes", "sem_seg"**
+    defined in :meth:`__init__` and they may be needed by different augmentations.
+    Most augmentation policies do not need attributes beyond these three.
+
+    After applying augmentations to these attributes (using :meth:`AugInput.transform`),
+    the returned transforms can then be used to transform other data structures that users have.
+
+    Examples:
+    ::
+        input = AugInput(image, boxes=boxes)
+        tfms = augmentation(input)
+        transformed_image = input.image
+        transformed_boxes = input.boxes
+        transformed_other_data = tfms.apply_other(other_data)
+
+    An extended project that works with new data types may implement augmentation policies
+    that need other inputs. An algorithm may need to transform inputs in a way different
+    from the standard approach defined in this class. In those rare situations, users can
+    implement a class similar to this class, that satify the following condition:
+
+    * The input must provide access to these data in the form of attribute access
+      (``getattr``).  For example, if an :class:`Augmentation` to be applied needs "image"
+      and "sem_seg" arguments, its input must have the attribute "image" and "sem_seg".
+    * The input must have a ``transform(tfm: Transform) -> None`` method which
+      in-place transforms all its attributes.
+    """
+
+    # TODO maybe should support more builtin data types here
+    def __init__(
+        self,
+        image: np.ndarray,
+        *,
+        boxes: Optional[np.ndarray] = None,
+        sem_seg: Optional[np.ndarray] = None,
+    ):
+        """
+        Args:
+            image (ndarray): (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or
+                floating point in range [0, 1] or [0, 255]. The meaning of C is up
+                to users.
+            boxes (ndarray or None): Nx4 float32 boxes in XYXY_ABS mode
+            sem_seg (ndarray or None): HxW uint8 semantic segmentation mask. Each element
+                is an integer label of pixel.
+        """
+        _check_img_dtype(image)
+        self.image = image
+        self.boxes = boxes
+        self.sem_seg = sem_seg
+
+    def transform(self, tfm: Transform) -> None:
+        """
+        In-place transform all attributes of this class.
+
+        By "in-place", it means after calling this method, accessing an attribute such
+        as ``self.image`` will return transformed data.
+        """
+        self.image = tfm.apply_image(self.image)
+        if self.boxes is not None:
+            self.boxes = tfm.apply_box(self.boxes)
+        if self.sem_seg is not None:
+            self.sem_seg = tfm.apply_segmentation(self.sem_seg)
+
+    def apply_augmentations(
+        self, augmentations: List[Union[Augmentation, Transform]]
+    ) -> TransformList:
+        """
+        Equivalent of ``AugmentationList(augmentations)(self)``
+        """
+        return AugmentationList(augmentations)(self)
+
+
+def apply_augmentations(augmentations: List[Union[Transform, Augmentation]], inputs):
+    """
+    Use ``T.AugmentationList(augmentations)(inputs)`` instead.
+    """
+    if isinstance(inputs, np.ndarray):
+        # handle the common case of image-only Augmentation, also for backward compatibility
+        image_only = True
+        inputs = AugInput(inputs)
+    else:
+        image_only = False
+    tfms = inputs.apply_augmentations(augmentations)
+    return inputs.image if image_only else inputs, tfms
+
+
+apply_transform_gens = apply_augmentations
+"""
+Alias for backward-compatibility.
+"""
+
+TransformGen = Augmentation
+"""
+Alias for Augmentation, since it is something that generates :class:`Transform`s
+"""
+
+StandardAugInput = AugInput
+"""
+Alias for compatibility. It's not worth the complexity to have two classes.
+"""
diff --git a/src/sts/detectron2/data/transforms/augmentation_impl.py b/src/sts/detectron2/data/transforms/augmentation_impl.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d04b2d16a41873e57f809d0aa57cea39c1432c9
--- /dev/null
+++ b/src/sts/detectron2/data/transforms/augmentation_impl.py
@@ -0,0 +1,579 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Implement many useful :class:`Augmentation`.
+"""
+import numpy as np
+import sys
+from fvcore.transforms.transform import (
+    BlendTransform,
+    CropTransform,
+    HFlipTransform,
+    NoOpTransform,
+    VFlipTransform,
+)
+from PIL import Image
+import random
+from .augmentation import Augmentation, _transform_to_aug
+from .transform import ExtentTransform, ResizeTransform, RotationTransform
+
+__all__ = [
+    "RandomApply",
+    "RandomBrightness",
+    "RandomContrast",
+    "RandomCrop",
+    "RandomExtent",
+    "RandomFlip",
+    "RandomSaturation",
+    "RandomLighting",
+    "RandomRotation",
+    "Resize",
+    "ResizeShortestEdge",
+    "RandomCrop_CategoryAreaConstraint",
+    "RandomCropWithInstance",
+]
+
+
+class RandomApply(Augmentation):
+    """
+    Randomly apply an augmentation with a given probability.
+    """
+
+    def __init__(self, tfm_or_aug, prob=0.5):
+        """
+        Args:
+            tfm_or_aug (Transform, Augmentation): the transform or augmentation
+                to be applied. It can either be a `Transform` or `Augmentation`
+                instance.
+            prob (float): probability between 0.0 and 1.0 that
+                the wrapper transformation is applied
+        """
+        super().__init__()
+        self.aug = _transform_to_aug(tfm_or_aug)
+        assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})"
+        self.prob = prob
+
+    def get_transform(self, *args):
+        do = self._rand_range() < self.prob
+        if do:
+            return self.aug.get_transform(*args)
+        else:
+            return NoOpTransform()
+
+    def __call__(self, aug_input):
+        do = self._rand_range() < self.prob
+        if do:
+            return self.aug(aug_input)
+        else:
+            return NoOpTransform()
+
+
+class RandomFlip(Augmentation):
+    """
+    Flip the image horizontally or vertically with the given probability.
+    """
+
+    def __init__(self, prob=0.5, *, horizontal=True, vertical=False):
+        """
+        Args:
+            prob (float): probability of flip.
+            horizontal (boolean): whether to apply horizontal flipping
+            vertical (boolean): whether to apply vertical flipping
+        """
+        super().__init__()
+
+        if horizontal and vertical:
+            raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.")
+        if not horizontal and not vertical:
+            raise ValueError("At least one of horiz or vert has to be True!")
+        self._init(locals())
+
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        do = self._rand_range() < self.prob
+        if do:
+            if self.horizontal:
+                return HFlipTransform(w)
+            elif self.vertical:
+                return VFlipTransform(h)
+        else:
+            return NoOpTransform()
+
+
+class Resize(Augmentation):
+    """ Resize image to a fixed target size"""
+
+    def __init__(self, shape, interp=Image.BILINEAR):
+        """
+        Args:
+            shape: (h, w) tuple or a int
+            interp: PIL interpolation method
+        """
+        if isinstance(shape, int):
+            shape = (shape, shape)
+        shape = tuple(shape)
+        self._init(locals())
+
+    def get_transform(self, image):
+        return ResizeTransform(
+            image.shape[0], image.shape[1], self.shape[0], self.shape[1], self.interp
+        )
+
+
+class ResizeShortestEdge(Augmentation):
+    """
+    Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge.
+    If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
+    """
+
+    def __init__(
+        self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR
+    ):
+        """
+        Args:
+            short_edge_length (list[int]): If ``sample_style=="range"``,
+                a [min, max] interval from which to sample the shortest edge length.
+                If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
+            max_size (int): maximum allowed longest edge length.
+            sample_style (str): either "range" or "choice".
+        """
+        super().__init__()
+        assert sample_style in ["range", "choice"], sample_style
+        self.is_range = sample_style == "range"
+        if isinstance(short_edge_length, int):
+            short_edge_length = (short_edge_length, short_edge_length)
+        if self.is_range:
+            assert len(short_edge_length) == 2, (
+                "short_edge_length must be two values using 'range' sample style."
+                f" Got {short_edge_length}!"
+            )
+        self._init(locals())
+
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        if self.is_range:
+            size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1)
+        else:
+            size = np.random.choice(self.short_edge_length)
+        if size == 0:
+            return NoOpTransform()
+        scale = size * 1.0 / min(h, w)
+        if h < w:
+            newh, neww = size, scale * w
+        else:
+            newh, neww = scale * h, size
+        if max(newh, neww) > self.max_size:
+            scale = self.max_size * 1.0 / max(newh, neww)
+            newh = newh * scale
+            neww = neww * scale
+        neww = int(neww + 0.5)
+        newh = int(newh + 0.5)
+        return ResizeTransform(h, w, newh, neww, self.interp)
+
+
+class RandomRotation(Augmentation):
+    """
+    This method returns a copy of this image, rotated the given
+    number of degrees counter clockwise around the given center.
+    """
+
+    def __init__(self, angle, expand=True, center=None, sample_style="range", interp=None):
+        """
+        Args:
+            angle (list[float]): If ``sample_style=="range"``,
+                a [min, max] interval from which to sample the angle (in degrees).
+                If ``sample_style=="choice"``, a list of angles to sample from
+            expand (bool): choose if the image should be resized to fit the whole
+                rotated image (default), or simply cropped
+            center (list[[float, float]]):  If ``sample_style=="range"``,
+                a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center,
+                [0, 0] being the top left of the image and [1, 1] the bottom right.
+                If ``sample_style=="choice"``, a list of centers to sample from
+                Default: None, which means that the center of rotation is the center of the image
+                center has no effect if expand=True because it only affects shifting
+        """
+        super().__init__()
+        assert sample_style in ["range", "choice"], sample_style
+        self.is_range = sample_style == "range"
+        if isinstance(angle, (float, int)):
+            angle = (angle, angle)
+        if center is not None and isinstance(center[0], (float, int)):
+            center = (center, center)
+        self._init(locals())
+
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        center = None
+        if self.is_range:
+            angle = np.random.uniform(self.angle[0], self.angle[1])
+            if self.center is not None:
+                center = (
+                    np.random.uniform(self.center[0][0], self.center[1][0]),
+                    np.random.uniform(self.center[0][1], self.center[1][1]),
+                )
+        else:
+            angle = np.random.choice(self.angle)
+            if self.center is not None:
+                center = np.random.choice(self.center)
+
+        if center is not None:
+            center = (w * center[0], h * center[1])  # Convert to absolute coordinates
+
+        if angle % 360 == 0:
+            return NoOpTransform()
+
+        return RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp)
+
+
+class RandomCrop(Augmentation):
+    """
+    Randomly crop a rectangle region out of an image.
+    """
+
+    def __init__(self, crop_type: str, crop_size):
+        """
+        Args:
+            crop_type (str): one of "relative_range", "relative", "absolute", "absolute_range".
+            crop_size (tuple[float, float]): two floats, explained below.
+
+        - "relative": crop a (H * crop_size[0], W * crop_size[1]) region from an input image of
+          size (H, W). crop size should be in (0, 1]
+        - "relative_range": uniformly sample two values from [crop_size[0], 1]
+          and [crop_size[1]], 1], and use them as in "relative" crop type.
+        - "absolute" crop a (crop_size[0], crop_size[1]) region from input image.
+          crop_size must be smaller than the input image size.
+        - "absolute_range", for an input of size (H, W), uniformly sample H_crop in
+          [crop_size[0], min(H, crop_size[1])] and W_crop in [crop_size[0], min(W, crop_size[1])].
+          Then crop a region (H_crop, W_crop).
+        """
+        # TODO style of relative_range and absolute_range are not consistent:
+        # one takes (h, w) but another takes (min, max)
+        super().__init__()
+        assert crop_type in ["relative_range", "relative", "absolute", "absolute_range"]
+        self._init(locals())
+
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        croph, cropw = self.get_crop_size((h, w))
+        assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self)
+        h0 = np.random.randint(h - croph + 1)
+        w0 = np.random.randint(w - cropw + 1)
+        return CropTransform(w0, h0, cropw, croph)
+
+    def get_crop_size(self, image_size):
+        """
+        Args:
+            image_size (tuple): height, width
+
+        Returns:
+            crop_size (tuple): height, width in absolute pixels
+        """
+        h, w = image_size
+        if self.crop_type == "relative":
+            ch, cw = self.crop_size
+            return int(h * ch + 0.5), int(w * cw + 0.5)
+        elif self.crop_type == "relative_range":
+            crop_size = np.asarray(self.crop_size, dtype=np.float32)
+            ch, cw = crop_size + np.random.rand(2) * (1 - crop_size)
+            return int(h * ch + 0.5), int(w * cw + 0.5)
+        elif self.crop_type == "absolute":
+            return (min(self.crop_size[0], h), min(self.crop_size[1], w))
+        elif self.crop_type == "absolute_range":
+            assert self.crop_size[0] <= self.crop_size[1]
+            ch = np.random.randint(min(h, self.crop_size[0]), min(h, self.crop_size[1]) + 1)
+            cw = np.random.randint(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1)
+            return ch, cw
+        else:
+            NotImplementedError("Unknown crop type {}".format(self.crop_type))
+
+
+class RandomCrop_CategoryAreaConstraint(Augmentation):
+    """
+    Similar to :class:`RandomCrop`, but find a cropping window such that no single category
+    occupies a ratio of more than `single_category_max_area` in semantic segmentation ground
+    truth, which can cause unstability in training. The function attempts to find such a valid
+    cropping window for at most 10 times.
+    """
+
+    def __init__(
+        self,
+        crop_type: str,
+        crop_size,
+        single_category_max_area: float = 1.0,
+        ignored_category: int = None,
+    ):
+        """
+        Args:
+            crop_type, crop_size: same as in :class:`RandomCrop`
+            single_category_max_area: the maximum allowed area ratio of a
+                category. Set to 1.0 to disable
+            ignored_category: allow this category in the semantic segmentation
+                ground truth to exceed the area ratio. Usually set to the category
+                that's ignored in training.
+        """
+        self.crop_aug = RandomCrop(crop_type, crop_size)
+        self._init(locals())
+
+    def get_transform(self, image, sem_seg):
+        if self.single_category_max_area >= 1.0:
+            return self.crop_aug.get_transform(image)
+        else:
+            h, w = sem_seg.shape
+            for _ in range(10):
+                crop_size = self.crop_aug.get_crop_size((h, w))
+                y0 = np.random.randint(h - crop_size[0] + 1)
+                x0 = np.random.randint(w - crop_size[1] + 1)
+                sem_seg_temp = sem_seg[y0 : y0 + crop_size[0], x0 : x0 + crop_size[1]]
+                labels, cnt = np.unique(sem_seg_temp, return_counts=True)
+                if self.ignored_category is not None:
+                    cnt = cnt[labels != self.ignored_category]
+                if len(cnt) > 1 and np.max(cnt) < np.sum(cnt) * self.single_category_max_area:
+                    break
+            crop_tfm = CropTransform(x0, y0, crop_size[1], crop_size[0])
+            return crop_tfm
+
+
+class RandomExtent(Augmentation):
+    """
+    Outputs an image by cropping a random "subrect" of the source image.
+
+    The subrect can be parameterized to include pixels outside the source image,
+    in which case they will be set to zeros (i.e. black). The size of the output
+    image will vary with the size of the random subrect.
+    """
+
+    def __init__(self, scale_range, shift_range):
+        """
+        Args:
+            output_size (h, w): Dimensions of output image
+            scale_range (l, h): Range of input-to-output size scaling factor
+            shift_range (x, y): Range of shifts of the cropped subrect. The rect
+                is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)],
+                where (w, h) is the (width, height) of the input image. Set each
+                component to zero to crop at the image's center.
+        """
+        super().__init__()
+        self._init(locals())
+
+    def get_transform(self, image):
+        img_h, img_w = image.shape[:2]
+
+        # Initialize src_rect to fit the input image.
+        src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h])
+
+        # Apply a random scaling to the src_rect.
+        src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1])
+
+        # Apply a random shift to the coordinates origin.
+        src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5)
+        src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5)
+
+        # Map src_rect coordinates into image coordinates (center at corner).
+        src_rect[0::2] += 0.5 * img_w
+        src_rect[1::2] += 0.5 * img_h
+
+        return ExtentTransform(
+            src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]),
+            output_size=(int(src_rect[3] - src_rect[1]), int(src_rect[2] - src_rect[0])),
+        )
+
+
+class RandomContrast(Augmentation):
+    """
+    Randomly transforms image contrast.
+
+    Contrast intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce contrast
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase contrast
+
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+
+    def __init__(self, intensity_min, intensity_max):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation
+            intensity_max (float): Maximum augmentation
+        """
+        super().__init__()
+        self._init(locals())
+
+    def get_transform(self, image):
+        w = np.random.uniform(self.intensity_min, self.intensity_max)
+        return BlendTransform(src_image=image.mean(), src_weight=1 - w, dst_weight=w)
+
+
+class RandomBrightness(Augmentation):
+    """
+    Randomly transforms image brightness.
+
+    Brightness intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce brightness
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase brightness
+
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+
+    def __init__(self, intensity_min, intensity_max):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation
+            intensity_max (float): Maximum augmentation
+        """
+        super().__init__()
+        self._init(locals())
+
+    def get_transform(self, image):
+        w = np.random.uniform(self.intensity_min, self.intensity_max)
+        return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w)
+
+
+class RandomSaturation(Augmentation):
+    """
+    Randomly transforms saturation of an RGB image.
+    Input images are assumed to have 'RGB' channel order.
+
+    Saturation intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce saturation (make the image more grayscale)
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase saturation
+
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+
+    def __init__(self, intensity_min, intensity_max):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation (1 preserves input).
+            intensity_max (float): Maximum augmentation (1 preserves input).
+        """
+        super().__init__()
+        self._init(locals())
+
+    def get_transform(self, image):
+        assert image.shape[-1] == 3, "RandomSaturation only works on RGB images"
+        w = np.random.uniform(self.intensity_min, self.intensity_max)
+        grayscale = image.dot([0.299, 0.587, 0.114])[:, :, np.newaxis]
+        return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w)
+
+
+class RandomLighting(Augmentation):
+    """
+    The "lighting" augmentation described in AlexNet, using fixed PCA over ImageNet.
+    Input images are assumed to have 'RGB' channel order.
+
+    The degree of color jittering is randomly sampled via a normal distribution,
+    with standard deviation given by the scale parameter.
+    """
+
+    def __init__(self, scale):
+        """
+        Args:
+            scale (float): Standard deviation of principal component weighting.
+        """
+        super().__init__()
+        self._init(locals())
+        self.eigen_vecs = np.array(
+            [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]]
+        )
+        self.eigen_vals = np.array([0.2175, 0.0188, 0.0045])
+
+    def get_transform(self, image):
+        assert image.shape[-1] == 3, "RandomLighting only works on RGB images"
+        weights = np.random.normal(scale=self.scale, size=3)
+        return BlendTransform(
+            src_image=self.eigen_vecs.dot(weights * self.eigen_vals), src_weight=1.0, dst_weight=1.0
+        )
+
+
+def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True):
+    """
+        Generate a CropTransform so that the cropping region contains
+        the center of the given instance.
+        Args:
+            crop_size (tuple): h, w in pixels
+            image_size (tuple): h, w
+            instance (dict): an annotation dict of one instance, in Detectron2's
+            dataset format.
+    """
+    bbox = random.choice(instances)
+    bbox[::2] = np.clip(bbox[::2],0,image_size[1])
+    bbox[1::2] = np.clip(bbox[1::2],0,image_size[0])
+    crop_size = np.asarray(crop_size, dtype=np.int32)
+    center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
+    assert (
+        image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
+    ), "The annotation bounding box is outside of the image!"
+    assert (
+        image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
+    ), "Crop size is larger than image size!"
+    min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
+    max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
+    max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
+    y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
+    x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
+    if not crop_box:
+        num_modifications = 0
+        modified = True
+        crop_size = crop_size.astype(np.float32)
+        while modified:
+            modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances)
+            num_modifications += 1
+            if num_modifications > 100:
+                raise ValueError(
+                        "Cannot finished cropping adjustment within 100 tries (#instances {}).".format(
+                                len(instances)
+                                )
+                        )
+                return CropTransform(0, 0, image_size[1], image_size[0])
+    if (x0 < 0) or (y0 < 0):
+        x0 = np.maximum(x0,0)
+        y0 = np.maximum(y0,0)
+    return CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0])))
+
+def adjust_crop(x0, y0, crop_size, instances, eps=1e-3):
+    modified = False
+    x1 = x0 + crop_size[1]
+    y1 = y0 + crop_size[0]
+    for bbox in instances:
+
+        if bbox[0] < x0 - eps and bbox[2] > x0 + eps:
+            crop_size[1] += x0 - bbox[0]
+            x0 = bbox[0]
+            modified = True
+
+        if bbox[0] < x1 - eps and bbox[2] > x1 + eps:
+            crop_size[1] += bbox[2] - x1
+            x1 = bbox[2]
+            modified = True
+
+        if bbox[1] < y0 - eps and bbox[3] > y0 + eps:
+            crop_size[0] += y0 - bbox[1]
+            y0 = bbox[1]
+            modified = True
+
+        if bbox[1] < y1 - eps and bbox[3] > y1 + eps:
+            crop_size[0] += bbox[3] - y1
+            y1 = bbox[3]
+            modified = True
+
+    return modified, x0, y0, crop_size
+
+
+class RandomCropWithInstance(RandomCrop):
+    def __init__(self, crop_type, crop_size, crop_instance=False):
+        """
+        Args:
+        crop_instance (bool): if False, extend cropping boxes to avoid cropping instances
+        """
+        super().__init__(crop_type, crop_size)
+        self.crop_instance = crop_instance
+        self.input_args = ("image", "boxes")
+    def get_transform(self, img, boxes):
+        image_size = img.shape[:2]
+        crop_size = self.get_crop_size(image_size)
+        return gen_crop_transform_with_instance(
+            crop_size, image_size, boxes, crop_box=self.crop_instance
+        )
diff --git a/src/sts/detectron2/data/transforms/transform.py b/src/sts/detectron2/data/transforms/transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..de44b991d7ab0d920ffb769e1402f08e358d37f7
--- /dev/null
+++ b/src/sts/detectron2/data/transforms/transform.py
@@ -0,0 +1,351 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+See "Data Augmentation" tutorial for an overview of the system:
+https://detectron2.readthedocs.io/tutorials/augmentation.html
+"""
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from fvcore.transforms.transform import (
+    CropTransform,
+    HFlipTransform,
+    NoOpTransform,
+    Transform,
+    TransformList,
+)
+from PIL import Image
+
+try:
+    import cv2  # noqa
+except ImportError:
+    # OpenCV is an optional dependency at the moment
+    pass
+
+__all__ = [
+    "ExtentTransform",
+    "ResizeTransform",
+    "RotationTransform",
+    "ColorTransform",
+    "PILColorTransform",
+]
+
+
+class ExtentTransform(Transform):
+    """
+    Extracts a subregion from the source image and scales it to the output size.
+
+    The fill color is used to map pixels from the source rect that fall outside
+    the source image.
+
+    See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform
+    """
+
+    def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0):
+        """
+        Args:
+            src_rect (x0, y0, x1, y1): src coordinates
+            output_size (h, w): dst image size
+            interp: PIL interpolation methods
+            fill: Fill color used when src_rect extends outside image
+        """
+        super().__init__()
+        self._set_attributes(locals())
+
+    def apply_image(self, img, interp=None):
+        h, w = self.output_size
+        if len(img.shape) > 2 and img.shape[2] == 1:
+            pil_image = Image.fromarray(img[:, :, 0], mode="L")
+        else:
+            pil_image = Image.fromarray(img)
+        pil_image = pil_image.transform(
+            size=(w, h),
+            method=Image.EXTENT,
+            data=self.src_rect,
+            resample=interp if interp else self.interp,
+            fill=self.fill,
+        )
+        ret = np.asarray(pil_image)
+        if len(img.shape) > 2 and img.shape[2] == 1:
+            ret = np.expand_dims(ret, -1)
+        return ret
+
+    def apply_coords(self, coords):
+        # Transform image center from source coordinates into output coordinates
+        # and then map the new origin to the corner of the output image.
+        h, w = self.output_size
+        x0, y0, x1, y1 = self.src_rect
+        new_coords = coords.astype(np.float32)
+        new_coords[:, 0] -= 0.5 * (x0 + x1)
+        new_coords[:, 1] -= 0.5 * (y0 + y1)
+        new_coords[:, 0] *= w / (x1 - x0)
+        new_coords[:, 1] *= h / (y1 - y0)
+        new_coords[:, 0] += 0.5 * w
+        new_coords[:, 1] += 0.5 * h
+        return new_coords
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+
+
+class ResizeTransform(Transform):
+    """
+    Resize the image to a target size.
+    """
+
+    def __init__(self, h, w, new_h, new_w, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            new_h, new_w (int): new image size
+            interp: PIL interpolation methods, defaults to bilinear.
+        """
+        # TODO decide on PIL vs opencv
+        super().__init__()
+        if interp is None:
+            interp = Image.BILINEAR
+        self._set_attributes(locals())
+
+    def apply_image(self, img, interp=None):
+        assert img.shape[:2] == (self.h, self.w)
+        assert len(img.shape) <= 4
+        interp_method = interp if interp is not None else self.interp
+
+        if img.dtype == np.uint8:
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                pil_image = Image.fromarray(img[:, :, 0], mode="L")
+            else:
+                pil_image = Image.fromarray(img)
+            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
+            ret = np.asarray(pil_image)
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                ret = np.expand_dims(ret, -1)
+        else:
+            # PIL only supports uint8
+            if any(x < 0 for x in img.strides):
+                img = np.ascontiguousarray(img)
+            img = torch.from_numpy(img)
+            shape = list(img.shape)
+            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
+            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
+            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
+                Image.NEAREST: "nearest",
+                Image.BILINEAR: "bilinear",
+                Image.BICUBIC: "bicubic",
+            }
+            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
+            align_corners = None if mode == "nearest" else False
+            img = F.interpolate(
+                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
+            )
+            shape[:2] = (self.new_h, self.new_w)
+            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
+
+        return ret
+
+    def apply_coords(self, coords):
+        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
+        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
+        return coords
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+
+    def inverse(self):
+        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
+
+
+class RotationTransform(Transform):
+    """
+    This method returns a copy of this image, rotated the given
+    number of degrees counter clockwise around its center.
+    """
+
+    def __init__(self, h, w, angle, expand=True, center=None, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            angle (float): degrees for rotation
+            expand (bool): choose if the image should be resized to fit the whole
+                rotated image (default), or simply cropped
+            center (tuple (width, height)): coordinates of the rotation center
+                if left to None, the center will be fit to the center of each image
+                center has no effect if expand=True because it only affects shifting
+            interp: cv2 interpolation method, default cv2.INTER_LINEAR
+        """
+        super().__init__()
+        image_center = np.array((w / 2, h / 2))
+        if center is None:
+            center = image_center
+        if interp is None:
+            interp = cv2.INTER_LINEAR
+        abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle))))
+        if expand:
+            # find the new width and height bounds
+            bound_w, bound_h = np.rint(
+                [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin]
+            ).astype(int)
+        else:
+            bound_w, bound_h = w, h
+
+        self._set_attributes(locals())
+        self.rm_coords = self.create_rotation_matrix()
+        # Needed because of this problem https://github.com/opencv/opencv/issues/11784
+        self.rm_image = self.create_rotation_matrix(offset=-0.5)
+
+    def apply_image(self, img, interp=None):
+        """
+        img should be a numpy array, formatted as Height * Width * Nchannels
+        """
+        if len(img) == 0 or self.angle % 360 == 0:
+            return img
+        assert img.shape[:2] == (self.h, self.w)
+        interp = interp if interp is not None else self.interp
+        return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp)
+
+    def apply_coords(self, coords):
+        """
+        coords should be a N * 2 array-like, containing N couples of (x, y) points
+        """
+        coords = np.asarray(coords, dtype=float)
+        if len(coords) == 0 or self.angle % 360 == 0:
+            return coords
+        return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :]
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST)
+        return segmentation
+
+    def create_rotation_matrix(self, offset=0):
+        center = (self.center[0] + offset, self.center[1] + offset)
+        rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1)
+        if self.expand:
+            # Find the coordinates of the center of rotation in the new image
+            # The only point for which we know the future coordinates is the center of the image
+            rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :]
+            new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center
+            # shift the rotation center to the new coordinates
+            rm[:, 2] += new_center
+        return rm
+
+    def inverse(self):
+        """
+        The inverse is to rotate it back with expand, and crop to get the original shape.
+        """
+        if not self.expand:  # Not possible to inverse if a part of the image is lost
+            raise NotImplementedError()
+        rotation = RotationTransform(
+            self.bound_h, self.bound_w, -self.angle, True, None, self.interp
+        )
+        crop = CropTransform(
+            (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h
+        )
+        return TransformList([rotation, crop])
+
+
+class ColorTransform(Transform):
+    """
+    Generic wrapper for any photometric transforms.
+    These transformations should only affect the color space and
+        not the coordinate space of the image (e.g. annotation
+        coordinates such as bounding boxes should not be changed)
+    """
+
+    def __init__(self, op):
+        """
+        Args:
+            op (Callable): operation to be applied to the image,
+                which takes in an ndarray and returns an ndarray.
+        """
+        if not callable(op):
+            raise ValueError("op parameter should be callable")
+        super().__init__()
+        self._set_attributes(locals())
+
+    def apply_image(self, img):
+        return self.op(img)
+
+    def apply_coords(self, coords):
+        return coords
+
+    def inverse(self):
+        return NoOpTransform()
+
+    def apply_segmentation(self, segmentation):
+        return segmentation
+
+
+class PILColorTransform(ColorTransform):
+    """
+    Generic wrapper for PIL Photometric image transforms,
+        which affect the color space and not the coordinate
+        space of the image
+    """
+
+    def __init__(self, op):
+        """
+        Args:
+            op (Callable): operation to be applied to the image,
+                which takes in a PIL Image and returns a transformed
+                PIL Image.
+                For reference on possible operations see:
+                - https://pillow.readthedocs.io/en/stable/
+        """
+        if not callable(op):
+            raise ValueError("op parameter should be callable")
+        super().__init__(op)
+
+    def apply_image(self, img):
+        img = Image.fromarray(img)
+        return np.asarray(super().apply_image(img))
+
+
+def HFlip_rotated_box(transform, rotated_boxes):
+    """
+    Apply the horizontal flip transform on rotated boxes.
+
+    Args:
+        rotated_boxes (ndarray): Nx5 floating point array of
+            (x_center, y_center, width, height, angle_degrees) format
+            in absolute coordinates.
+    """
+    # Transform x_center
+    rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0]
+    # Transform angle
+    rotated_boxes[:, 4] = -rotated_boxes[:, 4]
+    return rotated_boxes
+
+
+def Resize_rotated_box(transform, rotated_boxes):
+    """
+    Apply the resizing transform on rotated boxes. For details of how these (approximation)
+    formulas are derived, please refer to :meth:`RotatedBoxes.scale`.
+
+    Args:
+        rotated_boxes (ndarray): Nx5 floating point array of
+            (x_center, y_center, width, height, angle_degrees) format
+            in absolute coordinates.
+    """
+    scale_factor_x = transform.new_w * 1.0 / transform.w
+    scale_factor_y = transform.new_h * 1.0 / transform.h
+    rotated_boxes[:, 0] *= scale_factor_x
+    rotated_boxes[:, 1] *= scale_factor_y
+    theta = rotated_boxes[:, 4] * np.pi / 180.0
+    c = np.cos(theta)
+    s = np.sin(theta)
+    rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s))
+    rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c))
+    rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi
+
+    return rotated_boxes
+
+
+HFlipTransform.register_type("rotated_box", HFlip_rotated_box)
+ResizeTransform.register_type("rotated_box", Resize_rotated_box)
+
+# not necessary any more with latest fvcore
+NoOpTransform.register_type("rotated_box", lambda t, x: x)
diff --git a/src/sts/detectron2/engine/__init__.py b/src/sts/detectron2/engine/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..08a61572b4c7d09c8d400e903a96cbf5b2cc4763
--- /dev/null
+++ b/src/sts/detectron2/engine/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from .launch import *
+from .train_loop import *
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+
+
+# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
+# but still make them available here
+from .hooks import *
+from .defaults import *
diff --git a/src/sts/detectron2/engine/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/engine/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..75e204f750188a1d774bf3ee41fc570df5d7b400
Binary files /dev/null and b/src/sts/detectron2/engine/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/engine/__pycache__/defaults.cpython-38.pyc b/src/sts/detectron2/engine/__pycache__/defaults.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f96f7d5c18ac478ba4d741567a53cd18ede567f
Binary files /dev/null and b/src/sts/detectron2/engine/__pycache__/defaults.cpython-38.pyc differ
diff --git a/src/sts/detectron2/engine/__pycache__/hooks.cpython-38.pyc b/src/sts/detectron2/engine/__pycache__/hooks.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71a2f0e3eba2ad6e0dd8406e9b54cbc813d1d288
Binary files /dev/null and b/src/sts/detectron2/engine/__pycache__/hooks.cpython-38.pyc differ
diff --git a/src/sts/detectron2/engine/__pycache__/launch.cpython-38.pyc b/src/sts/detectron2/engine/__pycache__/launch.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..93ac358872d0c005f32cbb5c23a063e56de990c5
Binary files /dev/null and b/src/sts/detectron2/engine/__pycache__/launch.cpython-38.pyc differ
diff --git a/src/sts/detectron2/engine/__pycache__/train_loop.cpython-38.pyc b/src/sts/detectron2/engine/__pycache__/train_loop.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1ecacdc8c0f96aef952a8ca26004c92385b10ea7
Binary files /dev/null and b/src/sts/detectron2/engine/__pycache__/train_loop.cpython-38.pyc differ
diff --git a/src/sts/detectron2/engine/defaults.py b/src/sts/detectron2/engine/defaults.py
new file mode 100644
index 0000000000000000000000000000000000000000..57fd52a75c88a3193ae34aa8d420838d88197135
--- /dev/null
+++ b/src/sts/detectron2/engine/defaults.py
@@ -0,0 +1,650 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+This file contains components with some default boilerplate logic user may need
+in training / testing. They will not work for everyone, but many users may find them useful.
+
+The behavior of functions/classes in this file is subject to change,
+since they are meant to represent the "common default behavior" people need in their projects.
+"""
+
+import argparse
+import logging
+import os
+import sys
+from collections import OrderedDict
+from typing import Optional
+import torch
+from fvcore.nn.precise_bn import get_bn_modules
+from torch.nn.parallel import DistributedDataParallel
+
+import detectron2.data.transforms as T
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.data import (
+    MetadataCatalog,
+    build_detection_test_loader,
+    build_detection_train_loader,
+)
+from detectron2.evaluation import (
+    DatasetEvaluator,
+    inference_on_dataset,
+    print_csv_format,
+    verify_results,
+)
+from detectron2.modeling import build_model
+from detectron2.solver import build_lr_scheduler, build_optimizer
+from detectron2.utils import comm
+from detectron2.utils.collect_env import collect_env_info
+from detectron2.utils.env import TORCH_VERSION, seed_all_rng
+from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+
+from . import hooks
+from .train_loop import AMPTrainer, SimpleTrainer, TrainerBase
+
+__all__ = [
+    "default_argument_parser",
+    "default_setup",
+    "default_writers",
+    "DefaultPredictor",
+    "DefaultTrainer",
+]
+
+
+def default_argument_parser(epilog=None):
+    """
+    Create a parser with some common arguments used by detectron2 users.
+
+    Args:
+        epilog (str): epilog passed to ArgumentParser describing the usage.
+
+    Returns:
+        argparse.ArgumentParser:
+    """
+    parser = argparse.ArgumentParser(
+        epilog=epilog
+        or f"""
+Examples:
+
+Run on single machine:
+    $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml
+
+Change some config options:
+    $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001
+
+Run on multiple machines:
+    (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url <URL> [--other-flags]
+    (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url <URL> [--other-flags]
+""",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
+    parser.add_argument(
+        "--resume",
+        action="store_true",
+        help="Whether to attempt to resume from the checkpoint directory. "
+        "See documentation of `DefaultTrainer.resume_or_load()` for what it means.",
+    )
+    parser.add_argument("--eval-only", action="store_true", help="perform evaluation only")
+    parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*")
+    parser.add_argument("--num-machines", type=int, default=1, help="total number of machines")
+    parser.add_argument(
+        "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)"
+    )
+
+    # PyTorch still may leave orphan processes in multi-gpu training.
+    # Therefore we use a deterministic way to obtain port,
+    # so that users are aware of orphan processes by seeing the port occupied.
+    port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14
+    parser.add_argument(
+        "--dist-url",
+        default="tcp://127.0.0.1:{}".format(port),
+        help="initialization URL for pytorch distributed backend. See "
+        "https://pytorch.org/docs/stable/distributed.html for details.",
+    )
+    parser.add_argument(
+        "opts",
+        help="Modify config options by adding 'KEY VALUE' pairs at the end of the command. "
+        "See config references at "
+        "https://detectron2.readthedocs.io/modules/config.html#config-references",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+
+
+def default_setup(cfg, args):
+    """
+    Perform some basic common setups at the beginning of a job, including:
+
+    1. Set up the detectron2 logger
+    2. Log basic information about environment, cmdline arguments, and config
+    3. Backup the config to the output directory
+
+    Args:
+        cfg (CfgNode): the full config to be used
+        args (argparse.NameSpace): the command line arguments to be logged
+    """
+    output_dir = cfg.OUTPUT_DIR
+    if comm.is_main_process() and output_dir:
+        PathManager.mkdirs(output_dir)
+
+    rank = comm.get_rank()
+    setup_logger(output_dir, distributed_rank=rank, name="fvcore")
+    logger = setup_logger(output_dir, distributed_rank=rank)
+
+    logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size()))
+    logger.info("Environment info:\n" + collect_env_info())
+
+    logger.info("Command line arguments: " + str(args))
+    if hasattr(args, "config_file") and args.config_file != "":
+        logger.info(
+            "Contents of args.config_file={}:\n{}".format(
+                args.config_file, PathManager.open(args.config_file, "r").read()
+            )
+        )
+
+    logger.info("Running with full config:\n{}".format(cfg))
+    if comm.is_main_process() and output_dir:
+        # Note: some of our scripts may expect the existence of
+        # config.yaml in output directory
+        path = os.path.join(output_dir, "config.yaml")
+        with PathManager.open(path, "w") as f:
+            f.write(cfg.dump())
+        logger.info("Full config saved to {}".format(path))
+
+    # make sure each worker has a different, yet deterministic seed if specified
+    seed_all_rng(None if cfg.SEED < 0 else cfg.SEED + rank)
+
+    # cudnn benchmark has large overhead. It shouldn't be used considering the small size of
+    # typical validation set.
+    if not (hasattr(args, "eval_only") and args.eval_only):
+        torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK
+
+
+def default_writers(output_dir: str, max_iter: Optional[int] = None):
+    """
+    Build a list of :class:`EventWriter` to be used.
+    It now consists of a :class:`CommonMetricPrinter`,
+    :class:`TensorboardXWriter` and :class:`JSONWriter`.
+
+    Args:
+        output_dir: directory to store JSON metrics and tensorboard events
+        max_iter: the total number of iterations
+
+    Returns:
+        list[EventWriter]: a list of :class:`EventWriter` objects.
+    """
+    return [
+        # It may not always print what you want to see, since it prints "common" metrics only.
+        CommonMetricPrinter(max_iter),
+        JSONWriter(os.path.join(output_dir, "metrics.json")),
+        TensorboardXWriter(output_dir),
+    ]
+
+
+class DefaultPredictor:
+    """
+    Create a simple end-to-end predictor with the given config that runs on
+    single device for a single input image.
+
+    Compared to using the model directly, this class does the following additions:
+
+    1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
+    2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`.
+    3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
+    4. Take one input image and produce a single output, instead of a batch.
+
+    If you'd like to do anything more fancy, please refer to its source code
+    as examples to build and use the model manually.
+
+    Attributes:
+        metadata (Metadata): the metadata of the underlying dataset, obtained from
+            cfg.DATASETS.TEST.
+
+    Examples:
+    ::
+        pred = DefaultPredictor(cfg)
+        inputs = cv2.imread("input.jpg")
+        outputs = pred(inputs)
+    """
+
+    def __init__(self, cfg):
+        self.cfg = cfg.clone()  # cfg can be modified by model
+        self.model = build_model(self.cfg)
+        self.model.eval()
+        if len(cfg.DATASETS.TEST):
+            self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
+
+        checkpointer = DetectionCheckpointer(self.model)
+        checkpointer.load(cfg.MODEL.WEIGHTS)
+
+        self.aug = T.ResizeShortestEdge(
+            [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
+        )
+
+        self.input_format = cfg.INPUT.FORMAT
+        assert self.input_format in ["RGB", "BGR"], self.input_format
+
+    def __call__(self, original_image):
+        """
+        Args:
+            original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+
+        Returns:
+            predictions (dict):
+                the output of the model for one image only.
+                See :doc:`/tutorials/models` for details about the format.
+        """
+        with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
+            # Apply pre-processing to image.
+            if self.input_format == "RGB":
+                # whether the model expects BGR inputs or RGB
+                original_image = original_image[:, :, ::-1]
+            height, width = original_image.shape[:2]
+            image = self.aug.get_transform(original_image).apply_image(original_image)
+            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
+
+            inputs = {"image": image, "height": height, "width": width}
+            predictions = self.model([inputs])[0]
+            return predictions
+
+
+class DefaultTrainer(TrainerBase):
+    """
+    A trainer with default training logic. It does the following:
+
+    1. Create a :class:`SimpleTrainer` using model, optimizer, dataloader
+       defined by the given config. Create a LR scheduler defined by the config.
+    2. Load the last checkpoint or `cfg.MODEL.WEIGHTS`, if exists, when
+       `resume_or_load` is called.
+    3. Register a few common hooks defined by the config.
+
+    It is created to simplify the **standard model training workflow** and reduce code boilerplate
+    for users who only need the standard training workflow, with standard features.
+    It means this class makes *many assumptions* about your training logic that
+    may easily become invalid in a new research. In fact, any assumptions beyond those made in the
+    :class:`SimpleTrainer` are too much for research.
+
+    The code of this class has been annotated about restrictive assumptions it makes.
+    When they do not work for you, you're encouraged to:
+
+    1. Overwrite methods of this class, OR:
+    2. Use :class:`SimpleTrainer`, which only does minimal SGD training and
+       nothing else. You can then add your own hooks if needed. OR:
+    3. Write your own training loop similar to `tools/plain_train_net.py`.
+
+    See the :doc:`/tutorials/training` tutorials for more details.
+
+    Note that the behavior of this class, like other functions/classes in
+    this file, is not stable, since it is meant to represent the "common default behavior".
+    It is only guaranteed to work well with the standard models and training workflow in detectron2.
+    To obtain more stable behavior, write your own training logic with other public APIs.
+
+    Examples:
+    ::
+        trainer = DefaultTrainer(cfg)
+        trainer.resume_or_load()  # load last checkpoint or MODEL.WEIGHTS
+        trainer.train()
+
+    Attributes:
+        scheduler:
+        checkpointer (DetectionCheckpointer):
+        cfg (CfgNode):
+    """
+
+    def __init__(self, cfg):
+        """
+        Args:
+            cfg (CfgNode):
+        """
+        super().__init__()
+        logger = logging.getLogger("detectron2")
+        if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called for d2
+            setup_logger()
+        cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())
+
+        # Assume these objects must be constructed in this order.
+        model = self.build_model(cfg)
+        optimizer = self.build_optimizer(cfg, model)
+        data_loader = self.build_train_loader(cfg)
+
+        # For training, wrap with DDP. But don't need this for inference.
+        if comm.get_world_size() > 1:
+            model = DistributedDataParallel(
+                model, device_ids=[comm.get_local_rank()], broadcast_buffers=False,find_unused_parameters=True
+            )
+        self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(
+            model, data_loader, optimizer
+        )
+
+        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
+        # Assume no other objects need to be checkpointed.
+        # We can later make it checkpoint the stateful hooks
+        self.checkpointer = DetectionCheckpointer(
+            # Assume you want to save checkpoints together with logs/statistics
+            model,
+            cfg.OUTPUT_DIR,
+            optimizer=optimizer,
+            scheduler=self.scheduler,
+        )
+        self.start_iter = 0
+        self.max_iter = cfg.SOLVER.MAX_ITER
+        self.cfg = cfg
+
+        self.register_hooks(self.build_hooks())
+
+    def resume_or_load(self, resume=True):
+        """
+        If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by
+        a `last_checkpoint` file), resume from the file. Resuming means loading all
+        available states (eg. optimizer and scheduler) and update iteration counter
+        from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used.
+
+        Otherwise, this is considered as an independent training. The method will load model
+        weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start
+        from iteration 0.
+
+        Args:
+            resume (bool): whether to do resume or not
+        """
+        checkpoint = self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume)
+        if resume and self.checkpointer.has_checkpoint():
+            self.start_iter = checkpoint.get("iteration", -1) + 1
+            # The checkpoint stores the training iteration that just finished, thus we start
+            # at the next iteration (or iter zero if there's no checkpoint).
+        if isinstance(self.model, DistributedDataParallel):
+            # broadcast loaded data/model from the first rank, because other
+            # machines may not have access to the checkpoint file
+            if TORCH_VERSION >= (1, 7):
+                self.model._sync_params_and_buffers()
+            self.start_iter = comm.all_gather(self.start_iter)[0]
+
+    def build_hooks(self):
+        """
+        Build a list of default hooks, including timing, evaluation,
+        checkpointing, lr scheduling, precise BN, writing events.
+
+        Returns:
+            list[HookBase]:
+        """
+        cfg = self.cfg.clone()
+        cfg.defrost()
+        cfg.DATALOADER.NUM_WORKERS = 0  # save some memory and time for PreciseBN
+
+        ret = [
+            hooks.IterationTimer(),
+            hooks.LRScheduler(),
+            hooks.PreciseBN(
+                # Run at the same freq as (but before) evaluation.
+                cfg.TEST.EVAL_PERIOD,
+                self.model,
+                # Build a new data loader to not affect training
+                self.build_train_loader(cfg),
+                cfg.TEST.PRECISE_BN.NUM_ITER,
+            )
+            if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model)
+            else None,
+        ]
+
+        # Do PreciseBN before checkpointer, because it updates the model and need to
+        # be saved by checkpointer.
+        # This is not always the best: if checkpointing has a different frequency,
+        # some checkpoints may have more precise statistics than others.
+        if comm.is_main_process():
+            ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD))
+
+        def test_and_save_results():
+            self._last_eval_results = self.test(self.cfg, self.model)
+            return self._last_eval_results
+
+        # Do evaluation after checkpointer, because then if it fails,
+        # we can use the saved checkpoint to debug.
+        ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))
+
+        if comm.is_main_process():
+            # Here the default print/log frequency of each writer is used.
+            # run writers in the end, so that evaluation metrics are written
+            ret.append(hooks.PeriodicWriter(self.build_writers(), period=20))
+        return ret
+
+    def build_writers(self):
+        """
+        Build a list of writers to be used using :func:`default_writers()`.
+        If you'd like a different list of writers, you can overwrite it in
+        your trainer.
+
+        Returns:
+            list[EventWriter]: a list of :class:`EventWriter` objects.
+        """
+        return default_writers(self.cfg.OUTPUT_DIR, self.max_iter)
+
+    def train(self):
+        """
+        Run training.
+
+        Returns:
+            OrderedDict of results, if evaluation is enabled. Otherwise None.
+        """
+        super().train(self.start_iter, self.max_iter)
+        if len(self.cfg.TEST.EXPECTED_RESULTS) and comm.is_main_process():
+            assert hasattr(
+                self, "_last_eval_results"
+            ), "No evaluation results obtained during training!"
+            verify_results(self.cfg, self._last_eval_results)
+            return self._last_eval_results
+
+    def run_step(self):
+        self._trainer.iter = self.iter
+        self._trainer.run_step()
+
+    @classmethod
+    def build_model(cls, cfg):
+        """
+        Returns:
+            torch.nn.Module:
+
+        It now calls :func:`detectron2.modeling.build_model`.
+        Overwrite it if you'd like a different model.
+        """
+        model = build_model(cfg)
+        logger = logging.getLogger(__name__)
+        logger.info("Model:\n{}".format(model))
+        return model
+
+    @classmethod
+    def build_optimizer(cls, cfg, model):
+        """
+        Returns:
+            torch.optim.Optimizer:
+
+        It now calls :func:`detectron2.solver.build_optimizer`.
+        Overwrite it if you'd like a different optimizer.
+        """
+        return build_optimizer(cfg, model)
+
+    @classmethod
+    def build_lr_scheduler(cls, cfg, optimizer):
+        """
+        It now calls :func:`detectron2.solver.build_lr_scheduler`.
+        Overwrite it if you'd like a different scheduler.
+        """
+        return build_lr_scheduler(cfg, optimizer)
+
+    @classmethod
+    def build_train_loader(cls, cfg):
+        """
+        Returns:
+            iterable
+
+        It now calls :func:`detectron2.data.build_detection_train_loader`.
+        Overwrite it if you'd like a different data loader.
+        """
+        return build_detection_train_loader(cfg)
+
+    @classmethod
+    def build_test_loader(cls, cfg, dataset_name):
+        """
+        Returns:
+            iterable
+
+        It now calls :func:`detectron2.data.build_detection_test_loader`.
+        Overwrite it if you'd like a different data loader.
+        """
+        return build_detection_test_loader(cfg, dataset_name)
+
+    @classmethod
+    def build_evaluator(cls, cfg, dataset_name):
+        """
+        Returns:
+            DatasetEvaluator or None
+
+        It is not implemented by default.
+        """
+        raise NotImplementedError(
+            """
+If you want DefaultTrainer to automatically run evaluation,
+please implement `build_evaluator()` in subclasses (see train_net.py for example).
+Alternatively, you can call evaluation functions yourself (see Colab balloon tutorial for example).
+"""
+        )
+
+    @classmethod
+    def test(cls, cfg, model, evaluators=None):
+        """
+        Args:
+            cfg (CfgNode):
+            model (nn.Module):
+            evaluators (list[DatasetEvaluator] or None): if None, will call
+                :meth:`build_evaluator`. Otherwise, must have the same length as
+                ``cfg.DATASETS.TEST``.
+
+        Returns:
+            dict: a dict of result metrics
+        """
+        logger = logging.getLogger(__name__)
+        if isinstance(evaluators, DatasetEvaluator):
+            evaluators = [evaluators]
+        if evaluators is not None:
+            assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
+                len(cfg.DATASETS.TEST), len(evaluators)
+            )
+
+        results = OrderedDict()
+        for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
+            data_loader = cls.build_test_loader(cfg, dataset_name)
+            # When evaluators are passed in as arguments,
+            # implicitly assume that evaluators can be created before data_loader.
+            if evaluators is not None:
+                evaluator = evaluators[idx]
+            else:
+                try:
+                    evaluator = cls.build_evaluator(cfg, dataset_name)
+                except NotImplementedError:
+                    logger.warn(
+                        "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
+                        "or implement its `build_evaluator` method."
+                    )
+                    results[dataset_name] = {}
+                    continue
+            results_i = inference_on_dataset(model, data_loader, evaluator)
+            results[dataset_name] = results_i
+            if comm.is_main_process():
+                assert isinstance(
+                    results_i, dict
+                ), "Evaluator must return a dict on the main process. Got {} instead.".format(
+                    results_i
+                )
+                logger.info("Evaluation results for {} in csv format:".format(dataset_name))
+                print_csv_format(results_i)
+
+        if len(results) == 1:
+            results = list(results.values())[0]
+        return results
+
+    @staticmethod
+    def auto_scale_workers(cfg, num_workers: int):
+        """
+        When the config is defined for certain number of workers (according to
+        ``cfg.SOLVER.REFERENCE_WORLD_SIZE``) that's different from the number of
+        workers currently in use, returns a new cfg where the total batch size
+        is scaled so that the per-GPU batch size stays the same as the
+        original ``IMS_PER_BATCH // REFERENCE_WORLD_SIZE``.
+
+        Other config options are also scaled accordingly:
+        * training steps and warmup steps are scaled inverse proportionally.
+        * learning rate are scaled proportionally, following :paper:`ImageNet in 1h`.
+
+        For example, with the original config like the following:
+
+        .. code-block:: yaml
+
+            IMS_PER_BATCH: 16
+            BASE_LR: 0.1
+            REFERENCE_WORLD_SIZE: 8
+            MAX_ITER: 5000
+            STEPS: (4000,)
+            CHECKPOINT_PERIOD: 1000
+
+        When this config is used on 16 GPUs instead of the reference number 8,
+        calling this method will return a new config with:
+
+        .. code-block:: yaml
+
+            IMS_PER_BATCH: 32
+            BASE_LR: 0.2
+            REFERENCE_WORLD_SIZE: 16
+            MAX_ITER: 2500
+            STEPS: (2000,)
+            CHECKPOINT_PERIOD: 500
+
+        Note that both the original config and this new config can be trained on 16 GPUs.
+        It's up to user whether to enable this feature (by setting ``REFERENCE_WORLD_SIZE``).
+
+        Returns:
+            CfgNode: a new config. Same as original if ``cfg.SOLVER.REFERENCE_WORLD_SIZE==0``.
+        """
+        old_world_size = cfg.SOLVER.REFERENCE_WORLD_SIZE
+        if old_world_size == 0 or old_world_size == num_workers:
+            return cfg
+        cfg = cfg.clone()
+        frozen = cfg.is_frozen()
+        cfg.defrost()
+
+        assert (
+            cfg.SOLVER.IMS_PER_BATCH % old_world_size == 0
+        ), "Invalid REFERENCE_WORLD_SIZE in config!"
+        scale = num_workers / old_world_size
+        bs = cfg.SOLVER.IMS_PER_BATCH = int(round(cfg.SOLVER.IMS_PER_BATCH * scale))
+        lr = cfg.SOLVER.BASE_LR = cfg.SOLVER.BASE_LR * scale
+        max_iter = cfg.SOLVER.MAX_ITER = int(round(cfg.SOLVER.MAX_ITER / scale))
+        warmup_iter = cfg.SOLVER.WARMUP_ITERS = int(round(cfg.SOLVER.WARMUP_ITERS / scale))
+        cfg.SOLVER.STEPS = tuple(int(round(s / scale)) for s in cfg.SOLVER.STEPS)
+        cfg.TEST.EVAL_PERIOD = int(round(cfg.TEST.EVAL_PERIOD / scale))
+        cfg.SOLVER.CHECKPOINT_PERIOD = int(round(cfg.SOLVER.CHECKPOINT_PERIOD / scale))
+        cfg.SOLVER.REFERENCE_WORLD_SIZE = num_workers  # maintain invariant
+        logger = logging.getLogger(__name__)
+        logger.info(
+            f"Auto-scaling the config to batch_size={bs}, learning_rate={lr}, "
+            f"max_iter={max_iter}, warmup={warmup_iter}."
+        )
+
+        if frozen:
+            cfg.freeze()
+        return cfg
+
+
+# Access basic attributes from the underlying trainer
+for _attr in ["model", "data_loader", "optimizer"]:
+    setattr(
+        DefaultTrainer,
+        _attr,
+        property(
+            # getter
+            lambda self, x=_attr: getattr(self._trainer, x),
+            # setter
+            lambda self, value, x=_attr: setattr(self._trainer, x, value),
+        ),
+    )
diff --git a/src/sts/detectron2/engine/hooks.py b/src/sts/detectron2/engine/hooks.py
new file mode 100644
index 0000000000000000000000000000000000000000..56551af7c7e8a07de67c455abffd007574bae5fd
--- /dev/null
+++ b/src/sts/detectron2/engine/hooks.py
@@ -0,0 +1,450 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import datetime
+import itertools
+import logging
+import os
+import tempfile
+import time
+from collections import Counter
+import torch
+from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer
+from fvcore.common.param_scheduler import ParamScheduler
+from fvcore.common.timer import Timer
+from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats
+
+import detectron2.utils.comm as comm
+from detectron2.evaluation.testing import flatten_results_dict
+from detectron2.solver import LRMultiplier
+from detectron2.utils.events import EventStorage, EventWriter
+from detectron2.utils.file_io import PathManager
+
+from .train_loop import HookBase
+
+__all__ = [
+    "CallbackHook",
+    "IterationTimer",
+    "PeriodicWriter",
+    "PeriodicCheckpointer",
+    "LRScheduler",
+    "AutogradProfiler",
+    "EvalHook",
+    "PreciseBN",
+]
+
+
+"""
+Implement some common hooks.
+"""
+
+
+class CallbackHook(HookBase):
+    """
+    Create a hook using callback functions provided by the user.
+    """
+
+    def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None):
+        """
+        Each argument is a function that takes one argument: the trainer.
+        """
+        self._before_train = before_train
+        self._before_step = before_step
+        self._after_step = after_step
+        self._after_train = after_train
+
+    def before_train(self):
+        if self._before_train:
+            self._before_train(self.trainer)
+
+    def after_train(self):
+        if self._after_train:
+            self._after_train(self.trainer)
+        # The functions may be closures that hold reference to the trainer
+        # Therefore, delete them to avoid circular reference.
+        del self._before_train, self._after_train
+        del self._before_step, self._after_step
+
+    def before_step(self):
+        if self._before_step:
+            self._before_step(self.trainer)
+
+    def after_step(self):
+        if self._after_step:
+            self._after_step(self.trainer)
+
+
+class IterationTimer(HookBase):
+    """
+    Track the time spent for each iteration (each run_step call in the trainer).
+    Print a summary in the end of training.
+
+    This hook uses the time between the call to its :meth:`before_step`
+    and :meth:`after_step` methods.
+    Under the convention that :meth:`before_step` of all hooks should only
+    take negligible amount of time, the :class:`IterationTimer` hook should be
+    placed at the beginning of the list of hooks to obtain accurate timing.
+    """
+
+    def __init__(self, warmup_iter=3):
+        """
+        Args:
+            warmup_iter (int): the number of iterations at the beginning to exclude
+                from timing.
+        """
+        self._warmup_iter = warmup_iter
+        self._step_timer = Timer()
+        self._start_time = time.perf_counter()
+        self._total_timer = Timer()
+
+    def before_train(self):
+        self._start_time = time.perf_counter()
+        self._total_timer.reset()
+        self._total_timer.pause()
+
+    def after_train(self):
+        logger = logging.getLogger(__name__)
+        total_time = time.perf_counter() - self._start_time
+        total_time_minus_hooks = self._total_timer.seconds()
+        hook_time = total_time - total_time_minus_hooks
+
+        num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter
+
+        if num_iter > 0 and total_time_minus_hooks > 0:
+            # Speed is meaningful only after warmup
+            # NOTE this format is parsed by grep in some scripts
+            logger.info(
+                "Overall training speed: {} iterations in {} ({:.4f} s / it)".format(
+                    num_iter,
+                    str(datetime.timedelta(seconds=int(total_time_minus_hooks))),
+                    total_time_minus_hooks / num_iter,
+                )
+            )
+
+        logger.info(
+            "Total training time: {} ({} on hooks)".format(
+                str(datetime.timedelta(seconds=int(total_time))),
+                str(datetime.timedelta(seconds=int(hook_time))),
+            )
+        )
+
+    def before_step(self):
+        self._step_timer.reset()
+        self._total_timer.resume()
+
+    def after_step(self):
+        # +1 because we're in after_step, the current step is done
+        # but not yet counted
+        iter_done = self.trainer.iter - self.trainer.start_iter + 1
+        if iter_done >= self._warmup_iter:
+            sec = self._step_timer.seconds()
+            self.trainer.storage.put_scalars(time=sec)
+        else:
+            self._start_time = time.perf_counter()
+            self._total_timer.reset()
+
+        self._total_timer.pause()
+
+
+class PeriodicWriter(HookBase):
+    """
+    Write events to EventStorage (by calling ``writer.write()``) periodically.
+
+    It is executed every ``period`` iterations and after the last iteration.
+    Note that ``period`` does not affect how data is smoothed by each writer.
+    """
+
+    def __init__(self, writers, period=20):
+        """
+        Args:
+            writers (list[EventWriter]): a list of EventWriter objects
+            period (int):
+        """
+        self._writers = writers
+        for w in writers:
+            assert isinstance(w, EventWriter), w
+        self._period = period
+
+    def after_step(self):
+        if (self.trainer.iter + 1) % self._period == 0 or (
+            self.trainer.iter == self.trainer.max_iter - 1
+        ):
+            for writer in self._writers:
+                writer.write()
+
+    def after_train(self):
+        for writer in self._writers:
+            # If any new data is found (e.g. produced by other after_train),
+            # write them before closing
+            writer.write()
+            writer.close()
+
+
+class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase):
+    """
+    Same as :class:`detectron2.checkpoint.PeriodicCheckpointer`, but as a hook.
+
+    Note that when used as a hook,
+    it is unable to save additional data other than what's defined
+    by the given `checkpointer`.
+
+    It is executed every ``period`` iterations and after the last iteration.
+    """
+
+    def before_train(self):
+        self.max_iter = self.trainer.max_iter
+
+    def after_step(self):
+        # No way to use **kwargs
+        self.step(self.trainer.iter)
+
+
+class LRScheduler(HookBase):
+    """
+    A hook which executes a torch builtin LR scheduler and summarizes the LR.
+    It is executed after every iteration.
+    """
+
+    def __init__(self, optimizer=None, scheduler=None):
+        """
+        Args:
+            optimizer (torch.optim.Optimizer):
+            scheduler (torch.optim.LRScheduler or fvcore.common.param_scheduler.ParamScheduler):
+                if a :class:`ParamScheduler` object, it defines the multiplier over the base LR
+                in the optimizer.
+
+        If any argument is not given, will try to obtain it from the trainer.
+        """
+        self._optimizer = optimizer
+        self._scheduler = scheduler
+
+    def before_train(self):
+        self._optimizer = self._optimizer or self.trainer.optimizer
+        self._scheduler = self._scheduler or self.trainer.scheduler
+        if isinstance(self._scheduler, ParamScheduler):
+            self._scheduler = LRMultiplier(
+                self._optimizer,
+                self._scheduler,
+                self.trainer.max_iter,
+                last_iter=self.trainer.iter - 1,
+            )
+
+        # NOTE: some heuristics on what LR to summarize
+        # summarize the param group with most parameters
+        largest_group = max(len(g["params"]) for g in self._optimizer.param_groups)
+
+        if largest_group == 1:
+            # If all groups have one parameter,
+            # then find the most common initial LR, and use it for summary
+            lr_count = Counter([g["lr"] for g in self._optimizer.param_groups])
+            lr = lr_count.most_common()[0][0]
+            for i, g in enumerate(self._optimizer.param_groups):
+                if g["lr"] == lr:
+                    self._best_param_group_id = i
+                    break
+        else:
+            for i, g in enumerate(self._optimizer.param_groups):
+                if len(g["params"]) == largest_group:
+                    self._best_param_group_id = i
+                    break
+
+    def after_step(self):
+        lr = self._optimizer.param_groups[self._best_param_group_id]["lr"]
+        self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False)
+        self._scheduler.step()
+
+
+class AutogradProfiler(HookBase):
+    """
+    A hook which runs `torch.autograd.profiler.profile`.
+
+    Examples:
+    ::
+        hooks.AutogradProfiler(
+             lambda trainer: trainer.iter > 10 and trainer.iter < 20, self.cfg.OUTPUT_DIR
+        )
+
+    The above example will run the profiler for iteration 10~20 and dump
+    results to ``OUTPUT_DIR``. We did not profile the first few iterations
+    because they are typically slower than the rest.
+    The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
+
+    Note:
+        When used together with NCCL on older version of GPUs,
+        autograd profiler may cause deadlock because it unnecessarily allocates
+        memory on every device it sees. The memory management calls, if
+        interleaved with NCCL calls, lead to deadlock on GPUs that do not
+        support ``cudaLaunchCooperativeKernelMultiDevice``.
+    """
+
+    def __init__(self, enable_predicate, output_dir, *, use_cuda=True):
+        """
+        Args:
+            enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
+                and returns whether to enable the profiler.
+                It will be called once every step, and can be used to select which steps to profile.
+            output_dir (str): the output directory to dump tracing files.
+            use_cuda (bool): same as in `torch.autograd.profiler.profile`.
+        """
+        self._enable_predicate = enable_predicate
+        self._use_cuda = use_cuda
+        self._output_dir = output_dir
+
+    def before_step(self):
+        if self._enable_predicate(self.trainer):
+            self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda)
+            self._profiler.__enter__()
+        else:
+            self._profiler = None
+
+    def after_step(self):
+        if self._profiler is None:
+            return
+        self._profiler.__exit__(None, None, None)
+        PathManager.mkdirs(self._output_dir)
+        out_file = os.path.join(
+            self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)
+        )
+        if "://" not in out_file:
+            self._profiler.export_chrome_trace(out_file)
+        else:
+            # Support non-posix filesystems
+            with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d:
+                tmp_file = os.path.join(d, "tmp.json")
+                self._profiler.export_chrome_trace(tmp_file)
+                with open(tmp_file) as f:
+                    content = f.read()
+            with PathManager.open(out_file, "w") as f:
+                f.write(content)
+
+
+class EvalHook(HookBase):
+    """
+    Run an evaluation function periodically, and at the end of training.
+
+    It is executed every ``eval_period`` iterations and after the last iteration.
+    """
+
+    def __init__(self, eval_period, eval_function):
+        """
+        Args:
+            eval_period (int): the period to run `eval_function`. Set to 0 to
+                not evaluate periodically (but still after the last iteration).
+            eval_function (callable): a function which takes no arguments, and
+                returns a nested dict of evaluation metrics.
+
+        Note:
+            This hook must be enabled in all or none workers.
+            If you would like only certain workers to perform evaluation,
+            give other workers a no-op function (`eval_function=lambda: None`).
+        """
+        self._period = eval_period
+        self._func = eval_function
+
+    def _do_eval(self):
+        results = self._func()
+
+        if results:
+            assert isinstance(
+                results, dict
+            ), "Eval function must return a dict. Got {} instead.".format(results)
+
+            flattened_results = flatten_results_dict(results)
+            for k, v in flattened_results.items():
+                try:
+                    v = float(v)
+                except Exception as e:
+                    raise ValueError(
+                        "[EvalHook] eval_function should return a nested dict of float. "
+                        "Got '{}: {}' instead.".format(k, v)
+                    ) from e
+            self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False)
+
+        # Evaluation may take different time among workers.
+        # A barrier make them start the next iteration together.
+        comm.synchronize()
+
+    def after_step(self):
+        next_iter = self.trainer.iter + 1
+        if self._period > 0 and next_iter % self._period == 0:
+            self._do_eval()
+
+    def after_train(self):
+        # This condition is to prevent the eval from running after a failed training
+        if self.trainer.iter + 1 >= self.trainer.max_iter:
+            self._do_eval()
+        # func is likely a closure that holds reference to the trainer
+        # therefore we clean it to avoid circular reference in the end
+        del self._func
+
+
+class PreciseBN(HookBase):
+    """
+    The standard implementation of BatchNorm uses EMA in inference, which is
+    sometimes suboptimal.
+    This class computes the true average of statistics rather than the moving average,
+    and put true averages to every BN layer in the given model.
+
+    It is executed every ``period`` iterations and after the last iteration.
+    """
+
+    def __init__(self, period, model, data_loader, num_iter):
+        """
+        Args:
+            period (int): the period this hook is run, or 0 to not run during training.
+                The hook will always run in the end of training.
+            model (nn.Module): a module whose all BN layers in training mode will be
+                updated by precise BN.
+                Note that user is responsible for ensuring the BN layers to be
+                updated are in training mode when this hook is triggered.
+            data_loader (iterable): it will produce data to be run by `model(data)`.
+            num_iter (int): number of iterations used to compute the precise
+                statistics.
+        """
+        self._logger = logging.getLogger(__name__)
+        if len(get_bn_modules(model)) == 0:
+            self._logger.info(
+                "PreciseBN is disabled because model does not contain BN layers in training mode."
+            )
+            self._disabled = True
+            return
+
+        self._model = model
+        self._data_loader = data_loader
+        self._num_iter = num_iter
+        self._period = period
+        self._disabled = False
+
+        self._data_iter = None
+
+    def after_step(self):
+        next_iter = self.trainer.iter + 1
+        is_final = next_iter == self.trainer.max_iter
+        if is_final or (self._period > 0 and next_iter % self._period == 0):
+            self.update_stats()
+
+    def update_stats(self):
+        """
+        Update the model with precise statistics. Users can manually call this method.
+        """
+        if self._disabled:
+            return
+
+        if self._data_iter is None:
+            self._data_iter = iter(self._data_loader)
+
+        def data_loader():
+            for num_iter in itertools.count(1):
+                if num_iter % 100 == 0:
+                    self._logger.info(
+                        "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter)
+                    )
+                # This way we can reuse the same iterator
+                yield next(self._data_iter)
+
+        with EventStorage():  # capture events in a new storage to discard them
+            self._logger.info(
+                "Running precise-BN for {} iterations...  ".format(self._num_iter)
+                + "Note that this could produce different statistics every time."
+            )
+            update_bn_stats(self._model, data_loader(), self._num_iter)
diff --git a/src/sts/detectron2/engine/launch.py b/src/sts/detectron2/engine/launch.py
new file mode 100644
index 0000000000000000000000000000000000000000..40dad262dd9929e6e4e9c60424b3fda1ab97318c
--- /dev/null
+++ b/src/sts/detectron2/engine/launch.py
@@ -0,0 +1,125 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+from datetime import timedelta
+import torch
+import torch.distributed as dist
+import torch.multiprocessing as mp
+
+from detectron2.utils import comm
+
+__all__ = ["DEFAULT_TIMEOUT", "launch"]
+
+DEFAULT_TIMEOUT = timedelta(minutes=30)
+
+
+def _find_free_port():
+    import socket
+
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    # Binding to port 0 will cause the OS to find an available port for us
+    sock.bind(("", 0))
+    port = sock.getsockname()[1]
+    sock.close()
+    # NOTE: there is still a chance the port could be taken by other processes.
+    return port
+
+
+def launch(
+    main_func,
+    num_gpus_per_machine,
+    num_machines=1,
+    machine_rank=0,
+    dist_url=None,
+    args=(),
+    timeout=DEFAULT_TIMEOUT,
+):
+    """
+    Launch multi-gpu or distributed training.
+    This function must be called on all machines involved in the training.
+    It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine.
+
+    Args:
+        main_func: a function that will be called by `main_func(*args)`
+        num_gpus_per_machine (int): number of GPUs per machine
+        num_machines (int): the total number of machines
+        machine_rank (int): the rank of this machine
+        dist_url (str): url to connect to for distributed jobs, including protocol
+                       e.g. "tcp://127.0.0.1:8686".
+                       Can be set to "auto" to automatically select a free port on localhost
+        timeout (timedelta): timeout of the distributed workers
+        args (tuple): arguments passed to main_func
+    """
+    world_size = num_machines * num_gpus_per_machine
+    if world_size > 1:
+        # https://github.com/pytorch/pytorch/pull/14391
+        # TODO prctl in spawned processes
+
+        if dist_url == "auto":
+            assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
+            port = _find_free_port()
+            dist_url = f"tcp://127.0.0.1:{port}"
+        if num_machines > 1 and dist_url.startswith("file://"):
+            logger = logging.getLogger(__name__)
+            logger.warning(
+                "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://"
+            )
+
+        mp.spawn(
+            _distributed_worker,
+            nprocs=num_gpus_per_machine,
+            args=(
+                main_func,
+                world_size,
+                num_gpus_per_machine,
+                machine_rank,
+                dist_url,
+                args,
+                timeout,
+            ),
+            daemon=False,
+        )
+    else:
+        main_func(*args)
+
+
+def _distributed_worker(
+    local_rank,
+    main_func,
+    world_size,
+    num_gpus_per_machine,
+    machine_rank,
+    dist_url,
+    args,
+    timeout=DEFAULT_TIMEOUT,
+):
+    assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
+    global_rank = machine_rank * num_gpus_per_machine + local_rank
+    try:
+        dist.init_process_group(
+            backend="NCCL",
+            init_method=dist_url,
+            world_size=world_size,
+            rank=global_rank,
+            timeout=timeout,
+        )
+    except Exception as e:
+        logger = logging.getLogger(__name__)
+        logger.error("Process group URL: {}".format(dist_url))
+        raise e
+    # synchronize is needed here to prevent a possible timeout after calling init_process_group
+    # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
+    comm.synchronize()
+
+    assert num_gpus_per_machine <= torch.cuda.device_count()
+    torch.cuda.set_device(local_rank)
+
+    # Setup the local process group (which contains ranks within the same machine)
+    assert comm._LOCAL_PROCESS_GROUP is None
+    num_machines = world_size // num_gpus_per_machine
+    for i in range(num_machines):
+        ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
+        pg = dist.new_group(ranks_on_i)
+        if i == machine_rank:
+            comm._LOCAL_PROCESS_GROUP = pg
+
+    main_func(*args)
diff --git a/src/sts/detectron2/engine/train_loop.py b/src/sts/detectron2/engine/train_loop.py
new file mode 100644
index 0000000000000000000000000000000000000000..25292d70a44143e2f89da552f13631624dae3db4
--- /dev/null
+++ b/src/sts/detectron2/engine/train_loop.py
@@ -0,0 +1,343 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+import numpy as np
+import time
+import weakref
+from typing import Dict, List, Optional
+import torch
+from torch.nn.parallel import DataParallel, DistributedDataParallel
+
+import detectron2.utils.comm as comm
+from detectron2.utils.events import EventStorage, get_event_storage
+from detectron2.utils.logger import _log_api_usage
+
+__all__ = ["HookBase", "TrainerBase", "SimpleTrainer", "AMPTrainer"]
+
+
+class HookBase:
+    """
+    Base class for hooks that can be registered with :class:`TrainerBase`.
+
+    Each hook can implement 4 methods. The way they are called is demonstrated
+    in the following snippet:
+    ::
+        hook.before_train()
+        for iter in range(start_iter, max_iter):
+            hook.before_step()
+            trainer.run_step()
+            hook.after_step()
+        iter += 1
+        hook.after_train()
+
+    Notes:
+        1. In the hook method, users can access ``self.trainer`` to access more
+           properties about the context (e.g., model, current iteration, or config
+           if using :class:`DefaultTrainer`).
+
+        2. A hook that does something in :meth:`before_step` can often be
+           implemented equivalently in :meth:`after_step`.
+           If the hook takes non-trivial time, it is strongly recommended to
+           implement the hook in :meth:`after_step` instead of :meth:`before_step`.
+           The convention is that :meth:`before_step` should only take negligible time.
+
+           Following this convention will allow hooks that do care about the difference
+           between :meth:`before_step` and :meth:`after_step` (e.g., timer) to
+           function properly.
+
+    Attributes:
+        trainer (TrainerBase): A weak reference to the trainer object. Set by the trainer
+            when the hook is registered.
+    """
+
+    def before_train(self):
+        """
+        Called before the first iteration.
+        """
+        pass
+
+    def after_train(self):
+        """
+        Called after the last iteration.
+        """
+        pass
+
+    def before_step(self):
+        """
+        Called before each iteration.
+        """
+        pass
+
+    def after_step(self):
+        """
+        Called after each iteration.
+        """
+        pass
+
+
+class TrainerBase:
+    """
+    Base class for iterative trainer with hooks.
+
+    The only assumption we made here is: the training runs in a loop.
+    A subclass can implement what the loop is.
+    We made no assumptions about the existence of dataloader, optimizer, model, etc.
+
+    Attributes:
+        iter(int): the current iteration.
+
+        start_iter(int): The iteration to start with.
+            By convention the minimum possible value is 0.
+
+        max_iter(int): The iteration to end training.
+
+        storage(EventStorage): An EventStorage that's opened during the course of training.
+    """
+
+    def __init__(self) -> None:
+        self._hooks: List[HookBase] = []
+        self.iter: int
+        self.start_iter: int
+        self.max_iter: int
+        self.storage: EventStorage
+        _log_api_usage("trainer." + self.__class__.__name__)
+
+    def register_hooks(self, hooks: List[Optional[HookBase]]) -> None:
+        """
+        Register hooks to the trainer. The hooks are executed in the order
+        they are registered.
+
+        Args:
+            hooks (list[Optional[HookBase]]): list of hooks
+        """
+        hooks = [h for h in hooks if h is not None]
+        for h in hooks:
+            assert isinstance(h, HookBase)
+            # To avoid circular reference, hooks and trainer cannot own each other.
+            # This normally does not matter, but will cause memory leak if the
+            # involved objects contain __del__:
+            # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/
+            h.trainer = weakref.proxy(self)
+        self._hooks.extend(hooks)
+
+    def train(self, start_iter: int, max_iter: int):
+        """
+        Args:
+            start_iter, max_iter (int): See docs above
+        """
+        logger = logging.getLogger(__name__)
+        logger.info("Starting training from iteration {}".format(start_iter))
+
+        self.iter = self.start_iter = start_iter
+        self.max_iter = max_iter
+
+        with EventStorage(start_iter) as self.storage:
+            try:
+                self.before_train()
+                for self.iter in range(start_iter, max_iter):
+                    self.before_step()
+                    self.run_step()
+                    self.after_step()
+                # self.iter == max_iter can be used by `after_train` to
+                # tell whether the training successfully finished or failed
+                # due to exceptions.
+                self.iter += 1
+            except Exception:
+                logger.exception("Exception during training:")
+                raise
+            finally:
+                self.after_train()
+
+    def before_train(self):
+        for h in self._hooks:
+            h.before_train()
+
+    def after_train(self):
+        self.storage.iter = self.iter
+        for h in self._hooks:
+            h.after_train()
+
+    def before_step(self):
+        # Maintain the invariant that storage.iter == trainer.iter
+        # for the entire execution of each step
+        self.storage.iter = self.iter
+
+        for h in self._hooks:
+            h.before_step()
+
+    def after_step(self):
+        for h in self._hooks:
+            h.after_step()
+
+    def run_step(self):
+        raise NotImplementedError
+
+
+class SimpleTrainer(TrainerBase):
+    """
+    A simple trainer for the most common type of task:
+    single-cost single-optimizer single-data-source iterative optimization,
+    optionally using data-parallelism.
+    It assumes that every step, you:
+
+    1. Compute the loss with a data from the data_loader.
+    2. Compute the gradients with the above loss.
+    3. Update the model with the optimizer.
+
+    All other tasks during training (checkpointing, logging, evaluation, LR schedule)
+    are maintained by hooks, which can be registered by :meth:`TrainerBase.register_hooks`.
+
+    If you want to do anything fancier than this,
+    either subclass TrainerBase and implement your own `run_step`,
+    or write your own training loop.
+    """
+
+    def __init__(self, model, data_loader, optimizer):
+        """
+        Args:
+            model: a torch Module. Takes a data from data_loader and returns a
+                dict of losses.
+            data_loader: an iterable. Contains data to be used to call model.
+            optimizer: a torch optimizer.
+        """
+        super().__init__()
+
+        """
+        We set the model to training mode in the trainer.
+        However it's valid to train a model that's in eval mode.
+        If you want your model (or a submodule of it) to behave
+        like evaluation during training, you can overwrite its train() method.
+        """
+        model.train()
+
+        self.model = model
+        self.data_loader = data_loader
+        self._data_loader_iter = iter(data_loader)
+        self.optimizer = optimizer
+
+    def run_step(self):
+        """
+        Implement the standard training logic described above.
+        """
+        assert self.model.training, "[SimpleTrainer] model was changed to eval mode!"
+        start = time.perf_counter()
+        """
+        If you want to do something with the data, you can wrap the dataloader.
+        """
+        data = next(self._data_loader_iter)
+        data_time = time.perf_counter() - start
+
+        """
+        If you want to do something with the losses, you can wrap the model.
+        """
+        loss_dict = self.model(data)
+        losses = sum(loss_dict.values())
+
+        """
+        If you need to accumulate gradients or do something similar, you can
+        wrap the optimizer with your custom `zero_grad()` method.
+        """
+        self.optimizer.zero_grad()
+        losses.backward()
+
+        self._write_metrics(loss_dict, data_time)
+
+        """
+        If you need gradient clipping/scaling or other processing, you can
+        wrap the optimizer with your custom `step()` method. But it is
+        suboptimal as explained in https://arxiv.org/abs/2006.15704 Sec 3.2.4
+        """
+        self.optimizer.step()
+
+    def _write_metrics(
+        self,
+        loss_dict: Dict[str, torch.Tensor],
+        data_time: float,
+        prefix: str = "",
+    ):
+        """
+        Args:
+            loss_dict (dict): dict of scalar losses
+            data_time (float): time taken by the dataloader iteration
+        """
+        metrics_dict = {k: v.detach().cpu().item() for k, v in loss_dict.items()}
+        metrics_dict["data_time"] = data_time
+
+        # Gather metrics among all workers for logging
+        # This assumes we do DDP-style training, which is currently the only
+        # supported method in detectron2.
+        all_metrics_dict = comm.gather(metrics_dict)
+
+        if comm.is_main_process():
+            storage = get_event_storage()
+
+            # data_time among workers can have high variance. The actual latency
+            # caused by data_time is the maximum among workers.
+            data_time = np.max([x.pop("data_time") for x in all_metrics_dict])
+            storage.put_scalar("data_time", data_time)
+
+            # average the rest metrics
+            metrics_dict = {
+                k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys()
+            }
+            total_losses_reduced = sum(metrics_dict.values())
+            if not np.isfinite(total_losses_reduced):
+                raise FloatingPointError(
+                    f"Loss became infinite or NaN at iteration={self.iter}!\n"
+                    f"loss_dict = {metrics_dict}"
+                )
+
+            storage.put_scalar("{}total_loss".format(prefix), total_losses_reduced)
+            if len(metrics_dict) > 1:
+                storage.put_scalars(**metrics_dict)
+
+
+class AMPTrainer(SimpleTrainer):
+    """
+    Like :class:`SimpleTrainer`, but uses PyTorch's native automatic mixed precision
+    in the training loop.
+    """
+
+    def __init__(self, model, data_loader, optimizer, grad_scaler=None):
+        """
+        Args:
+            model, data_loader, optimizer: same as in :class:`SimpleTrainer`.
+            grad_scaler: torch GradScaler to automatically scale gradients.
+        """
+        unsupported = "AMPTrainer does not support single-process multi-device training!"
+        if isinstance(model, DistributedDataParallel):
+            assert not (model.device_ids and len(model.device_ids) > 1), unsupported
+        assert not isinstance(model, DataParallel), unsupported
+
+        super().__init__(model, data_loader, optimizer)
+
+        if grad_scaler is None:
+            from torch.cuda.amp import GradScaler
+
+            grad_scaler = GradScaler()
+        self.grad_scaler = grad_scaler
+
+    def run_step(self):
+        """
+        Implement the AMP training logic.
+        """
+        assert self.model.training, "[AMPTrainer] model was changed to eval mode!"
+        assert torch.cuda.is_available(), "[AMPTrainer] CUDA is required for AMP training!"
+        from torch.cuda.amp import autocast
+
+        start = time.perf_counter()
+        data = next(self._data_loader_iter)
+        data_time = time.perf_counter() - start
+
+        with autocast():
+            loss_dict = self.model(data)
+            losses = sum(loss_dict.values())
+
+        self.optimizer.zero_grad()
+        self.grad_scaler.scale(losses).backward()
+
+        self._write_metrics(loss_dict, data_time)
+
+        self.grad_scaler.step(self.optimizer)
+        self.grad_scaler.update()
diff --git a/src/sts/detectron2/evaluation/__init__.py b/src/sts/detectron2/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a059c9cb3eb66f5e107721e30b5c9eda5122ec5
--- /dev/null
+++ b/src/sts/detectron2/evaluation/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator
+from .coco_evaluation import COCOEvaluator
+from .rotated_coco_evaluation import RotatedCOCOEvaluator
+from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
+from .lvis_evaluation import LVISEvaluator
+from .panoptic_evaluation import COCOPanopticEvaluator
+from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
+from .sem_seg_evaluation import SemSegEvaluator
+from .testing import print_csv_format, verify_results
+from .text_evaluation import TextEvaluator
+from .text_eval_script import text_eval_main
+from . import rrc_evaluation_funcs
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/detectron2/evaluation/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b7913f3d3b9f9816efacc01eda8e7e449af44683
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/cityscapes_evaluation.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/cityscapes_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..914c0f2db9ab7fb1f07718be5f45210dab900b03
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/cityscapes_evaluation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/coco_evaluation.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/coco_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..191fe045d5a3f2def4ee4c72e0afed02dc6fcf0a
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/coco_evaluation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/evaluator.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/evaluator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eca48ef31e20c96d10e382a44eec69a9dc252e58
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/evaluator.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/fast_eval_api.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/fast_eval_api.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e7466bc4bb14f4c7f83c654effd44d54303a7b5a
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/fast_eval_api.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/lvis_evaluation.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/lvis_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad15f5fab83aecd58a8f3f4ce5ab1bed4c1c8260
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/lvis_evaluation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/panoptic_evaluation.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/panoptic_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6aa24c0d76fff313f1a54da265cac4c27c319052
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/panoptic_evaluation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/pascal_voc_evaluation.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/pascal_voc_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9a8802dc51cfbfcc19df62c0b53f87009cec8d61
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/pascal_voc_evaluation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/rotated_coco_evaluation.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/rotated_coco_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10021b131ecfe399c34c19d5169bde1e479432ed
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/rotated_coco_evaluation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/rrc_evaluation_funcs.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/rrc_evaluation_funcs.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fd07c659b8bebe146967d2f7ac30e0cd937f3b68
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/rrc_evaluation_funcs.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/rrc_evaluation_funcs_ic15.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/rrc_evaluation_funcs_ic15.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..03967d7be08c5d062e8cf20a8591d039560d27b6
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/rrc_evaluation_funcs_ic15.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/sem_seg_evaluation.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/sem_seg_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a2ee12ddf3a3d725d307121c9dc1eff2f455f5d
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/sem_seg_evaluation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/testing.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/testing.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ed73c3876f09ffa03ea7ccc9c99c40c13b8aa18
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/testing.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/text_eval_script.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/text_eval_script.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..418cc2192bb91b4ee5a1b3da0b4b99337cfcd2cd
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/text_eval_script.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/text_eval_script_ic15.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/text_eval_script_ic15.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a93257bf38fa1bf26149ae2d9e4c508fccd3d3d8
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/text_eval_script_ic15.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/__pycache__/text_evaluation.cpython-38.pyc b/src/sts/detectron2/evaluation/__pycache__/text_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..15025d9d7b4bcf6de30c72802e804baa5cdad876
Binary files /dev/null and b/src/sts/detectron2/evaluation/__pycache__/text_evaluation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/evaluation/cityscapes_evaluation.py b/src/sts/detectron2/evaluation/cityscapes_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fb6c4cd5f752d639570d022cb23ce18491c370a
--- /dev/null
+++ b/src/sts/detectron2/evaluation/cityscapes_evaluation.py
@@ -0,0 +1,194 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import glob
+import logging
+import numpy as np
+import os
+import tempfile
+from collections import OrderedDict
+import torch
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+
+class CityscapesEvaluator(DatasetEvaluator):
+    """
+    Base class for evaluation using cityscapes API.
+    """
+
+    def __init__(self, dataset_name):
+        """
+        Args:
+            dataset_name (str): the name of the dataset.
+                It must have the following metadata associated with it:
+                "thing_classes", "gt_dir".
+        """
+        self._metadata = MetadataCatalog.get(dataset_name)
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+
+    def reset(self):
+        self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_")
+        self._temp_dir = self._working_dir.name
+        # All workers will write to the same results directory
+        # TODO this does not work in distributed training
+        self._temp_dir = comm.all_gather(self._temp_dir)[0]
+        if self._temp_dir != self._working_dir.name:
+            self._working_dir.cleanup()
+        self._logger.info(
+            "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir)
+        )
+
+
+class CityscapesInstanceEvaluator(CityscapesEvaluator):
+    """
+    Evaluate instance segmentation results on cityscapes dataset using cityscapes API.
+
+    Note:
+        * It does not work in multi-machine distributed training.
+        * It contains a synchronization, therefore has to be used on all ranks.
+        * Only the main process runs evaluation.
+    """
+
+    def process(self, inputs, outputs):
+        from cityscapesscripts.helpers.labels import name2label
+
+        for input, output in zip(inputs, outputs):
+            file_name = input["file_name"]
+            basename = os.path.splitext(os.path.basename(file_name))[0]
+            pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt")
+
+            if "instances" in output:
+                output = output["instances"].to(self._cpu_device)
+                num_instances = len(output)
+                with open(pred_txt, "w") as fout:
+                    for i in range(num_instances):
+                        pred_class = output.pred_classes[i]
+                        classes = self._metadata.thing_classes[pred_class]
+                        class_id = name2label[classes].id
+                        score = output.scores[i]
+                        mask = output.pred_masks[i].numpy().astype("uint8")
+                        png_filename = os.path.join(
+                            self._temp_dir, basename + "_{}_{}.png".format(i, classes)
+                        )
+
+                        Image.fromarray(mask * 255).save(png_filename)
+                        fout.write(
+                            "{} {} {}\n".format(os.path.basename(png_filename), class_id, score)
+                        )
+            else:
+                # Cityscapes requires a prediction file for every ground truth image.
+                with open(pred_txt, "w") as fout:
+                    pass
+
+    def evaluate(self):
+        """
+        Returns:
+            dict: has a key "segm", whose value is a dict of "AP" and "AP50".
+        """
+        comm.synchronize()
+        if comm.get_rank() > 0:
+            return
+        import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval
+
+        self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
+
+        # set some global states in cityscapes evaluation API, before evaluating
+        cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
+        cityscapes_eval.args.predictionWalk = None
+        cityscapes_eval.args.JSONOutput = False
+        cityscapes_eval.args.colorized = False
+        cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json")
+
+        # These lines are adopted from
+        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa
+        gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
+        groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png"))
+        assert len(
+            groundTruthImgList
+        ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+            cityscapes_eval.args.groundTruthSearch
+        )
+        predictionImgList = []
+        for gt in groundTruthImgList:
+            predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args))
+        results = cityscapes_eval.evaluateImgLists(
+            predictionImgList, groundTruthImgList, cityscapes_eval.args
+        )["averages"]
+
+        ret = OrderedDict()
+        ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100}
+        self._working_dir.cleanup()
+        return ret
+
+
+class CityscapesSemSegEvaluator(CityscapesEvaluator):
+    """
+    Evaluate semantic segmentation results on cityscapes dataset using cityscapes API.
+
+    Note:
+        * It does not work in multi-machine distributed training.
+        * It contains a synchronization, therefore has to be used on all ranks.
+        * Only the main process runs evaluation.
+    """
+
+    def process(self, inputs, outputs):
+        from cityscapesscripts.helpers.labels import trainId2label
+
+        for input, output in zip(inputs, outputs):
+            file_name = input["file_name"]
+            basename = os.path.splitext(os.path.basename(file_name))[0]
+            pred_filename = os.path.join(self._temp_dir, basename + "_pred.png")
+
+            output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy()
+            pred = 255 * np.ones(output.shape, dtype=np.uint8)
+            for train_id, label in trainId2label.items():
+                if label.ignoreInEval:
+                    continue
+                pred[output == train_id] = label.id
+            Image.fromarray(pred).save(pred_filename)
+
+    def evaluate(self):
+        comm.synchronize()
+        if comm.get_rank() > 0:
+            return
+        # Load the Cityscapes eval script *after* setting the required env var,
+        # since the script reads CITYSCAPES_DATASET into global variables at load time.
+        import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval
+
+        self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
+
+        # set some global states in cityscapes evaluation API, before evaluating
+        cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
+        cityscapes_eval.args.predictionWalk = None
+        cityscapes_eval.args.JSONOutput = False
+        cityscapes_eval.args.colorized = False
+
+        # These lines are adopted from
+        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa
+        gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
+        groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png"))
+        assert len(
+            groundTruthImgList
+        ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+            cityscapes_eval.args.groundTruthSearch
+        )
+        predictionImgList = []
+        for gt in groundTruthImgList:
+            predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt))
+        results = cityscapes_eval.evaluateImgLists(
+            predictionImgList, groundTruthImgList, cityscapes_eval.args
+        )
+        ret = OrderedDict()
+        ret["sem_seg"] = {
+            "IoU": 100.0 * results["averageScoreClasses"],
+            "iIoU": 100.0 * results["averageScoreInstClasses"],
+            "IoU_sup": 100.0 * results["averageScoreCategories"],
+            "iIoU_sup": 100.0 * results["averageScoreInstCategories"],
+        }
+        self._working_dir.cleanup()
+        return ret
diff --git a/src/sts/detectron2/evaluation/coco_evaluation.py b/src/sts/detectron2/evaluation/coco_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2131d7475a8649a86df2112a13ee6d187089161
--- /dev/null
+++ b/src/sts/detectron2/evaluation/coco_evaluation.py
@@ -0,0 +1,579 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import copy
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import pickle
+from collections import OrderedDict
+import pycocotools.mask as mask_util
+import torch
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from tabulate import tabulate
+
+import detectron2.utils.comm as comm
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.data.datasets.coco import convert_to_coco_json
+from detectron2.evaluation.fast_eval_api import COCOeval_opt
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+
+from .evaluator import DatasetEvaluator
+
+
+class COCOEvaluator(DatasetEvaluator):
+    """
+    Evaluate AR for object proposals, AP for instance detection/segmentation, AP
+    for keypoint detection outputs using COCO's metrics.
+    See http://cocodataset.org/#detection-eval and
+    http://cocodataset.org/#keypoints-eval to understand its metrics.
+    The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means
+    the metric cannot be computed (e.g. due to no predictions made).
+
+    In addition to COCO, this evaluator is able to support any bounding box detection,
+    instance segmentation, or keypoint detection dataset.
+    """
+
+    def __init__(
+        self,
+        dataset_name,
+        tasks=None,
+        distributed=True,
+        output_dir=None,
+        *,
+        use_fast_impl=True,
+        kpt_oks_sigmas=(),
+    ):
+        """
+        Args:
+            dataset_name (str): name of the dataset to be evaluated.
+                It must have either the following corresponding metadata:
+
+                    "json_file": the path to the COCO format annotation
+
+                Or it must be in detectron2's standard dataset format
+                so it can be converted to COCO format automatically.
+            tasks (tuple[str]): tasks that can be evaluated under the given
+                configuration. A task is one of "bbox", "segm", "keypoints".
+                By default, will infer this automatically from predictions.
+            distributed (True): if True, will collect results from all ranks and run evaluation
+                in the main process.
+                Otherwise, will only evaluate the results in the current process.
+            output_dir (str): optional, an output directory to dump all
+                results predicted on the dataset. The dump contains two files:
+
+                1. "instances_predictions.pth" a file that can be loaded with `torch.load` and
+                   contains all the results in the format they are produced by the model.
+                2. "coco_instances_results.json" a json file in COCO's result format.
+            use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP.
+                Although the results should be very close to the official implementation in COCO
+                API, it is still recommended to compute results with the official API for use in
+                papers. The faster implementation also uses more RAM.
+            kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS.
+                See http://cocodataset.org/#keypoints-eval
+                When empty, it will use the defaults in COCO.
+                Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
+        """
+        self._logger = logging.getLogger(__name__)
+        self._distributed = distributed
+        self._output_dir = output_dir
+        self._use_fast_impl = use_fast_impl
+
+        if tasks is not None and isinstance(tasks, CfgNode):
+            kpt_oks_sigmas = (
+                tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas
+            )
+            self._logger.warn(
+                "COCO Evaluator instantiated using config, this is deprecated behavior."
+                " Please pass in explicit arguments instead."
+            )
+            self._tasks = None  # Infering it from predictions should be better
+        else:
+            self._tasks = tasks
+
+        self._cpu_device = torch.device("cpu")
+
+        self._metadata = MetadataCatalog.get(dataset_name)
+        if not hasattr(self._metadata, "json_file"):
+            self._logger.info(
+                f"'{dataset_name}' is not registered by `register_coco_instances`."
+                " Therefore trying to convert it to COCO format ..."
+            )
+
+            cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json")
+            self._metadata.json_file = cache_path
+            convert_to_coco_json(dataset_name, cache_path)
+
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        with contextlib.redirect_stdout(io.StringIO()):
+            self._coco_api = COCO(json_file)
+
+        # Test set json files do not contain annotations (evaluation must be
+        # performed using the COCO evaluation server).
+        self._do_evaluation = "annotations" in self._coco_api.dataset
+        if self._do_evaluation:
+            self._kpt_oks_sigmas = kpt_oks_sigmas
+
+    def reset(self):
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a COCO model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+        """
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+
+            if "instances" in output:
+                instances = output["instances"].to(self._cpu_device)
+                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
+            if "proposals" in output:
+                prediction["proposals"] = output["proposals"].to(self._cpu_device)
+            if len(prediction) > 1:
+                self._predictions.append(prediction)
+
+    def evaluate(self, img_ids=None):
+        """
+        Args:
+            img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset
+        """
+        if self._distributed:
+            comm.synchronize()
+            predictions = comm.gather(self._predictions, dst=0)
+            predictions = list(itertools.chain(*predictions))
+
+            if not comm.is_main_process():
+                return {}
+        else:
+            predictions = self._predictions
+
+        if len(predictions) == 0:
+            self._logger.warning("[COCOEvaluator] Did not receive valid predictions.")
+            return {}
+
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "instances_predictions.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(predictions, f)
+
+        self._results = OrderedDict()
+        if "proposals" in predictions[0]:
+            self._eval_box_proposals(predictions)
+        if "instances" in predictions[0]:
+            self._eval_predictions(predictions, img_ids=img_ids)
+        # Copy so the caller can do whatever with results
+        return copy.deepcopy(self._results)
+
+    def _tasks_from_predictions(self, predictions):
+        """
+        Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions.
+        """
+        tasks = {"bbox"}
+        for pred in predictions:
+            if "segmentation" in pred:
+                tasks.add("segm")
+            if "keypoints" in pred:
+                tasks.add("keypoints")
+        return sorted(tasks)
+
+    def _eval_predictions(self, predictions, img_ids=None):
+        """
+        Evaluate predictions. Fill self._results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        tasks = self._tasks or self._tasks_from_predictions(coco_results)
+
+        # unmap the category ids for COCO
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id
+            all_contiguous_ids = list(dataset_id_to_contiguous_id.values())
+            num_classes = len(all_contiguous_ids)
+            assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1
+
+            reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()}
+            for result in coco_results:
+                category_id = result["category_id"]
+                assert category_id < num_classes, (
+                    f"A prediction has class={category_id}, "
+                    f"but the dataset only has {num_classes} classes and "
+                    f"predicted class id should be in [0, {num_classes - 1}]."
+                )
+                result["category_id"] = reverse_id_mapping[category_id]
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(coco_results))
+                f.flush()
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info(
+            "Evaluating predictions with {} COCO API...".format(
+                "unofficial" if self._use_fast_impl else "official"
+            )
+        )
+        for task in sorted(tasks):
+            assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!"
+            coco_eval = (
+                _evaluate_predictions_on_coco(
+                    self._coco_api,
+                    coco_results,
+                    task,
+                    kpt_oks_sigmas=self._kpt_oks_sigmas,
+                    use_fast_impl=self._use_fast_impl,
+                    img_ids=img_ids,
+                )
+                if len(coco_results) > 0
+                else None  # cocoapi does not handle empty results very well
+            )
+
+            res = self._derive_coco_results(
+                coco_eval, task, class_names=self._metadata.get("thing_classes")
+            )
+            self._results[task] = res
+
+    def _eval_box_proposals(self, predictions):
+        """
+        Evaluate the box proposals in predictions.
+        Fill self._results with the metrics for "box_proposals" task.
+        """
+        if self._output_dir:
+            # Saving generated box proposals to file.
+            # Predicted box_proposals are in XYXY_ABS mode.
+            bbox_mode = BoxMode.XYXY_ABS.value
+            ids, boxes, objectness_logits = [], [], []
+            for prediction in predictions:
+                ids.append(prediction["image_id"])
+                boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
+                objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
+
+            proposal_data = {
+                "boxes": boxes,
+                "objectness_logits": objectness_logits,
+                "ids": ids,
+                "bbox_mode": bbox_mode,
+            }
+            with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
+                pickle.dump(proposal_data, f)
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info("Evaluating bbox proposals ...")
+        res = {}
+        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
+        for limit in [100, 1000]:
+            for area, suffix in areas.items():
+                stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit)
+                key = "AR{}@{:d}".format(suffix, limit)
+                res[key] = float(stats["ar"].item() * 100)
+        self._logger.info("Proposal metrics: \n" + create_small_table(res))
+        self._results["box_proposals"] = res
+
+    def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
+        """
+        Derive the desired score numbers from summarized COCOeval.
+
+        Args:
+            coco_eval (None or COCOEval): None represents no predictions from model.
+            iou_type (str):
+            class_names (None or list[str]): if provided, will use it to predict
+                per-category AP.
+
+        Returns:
+            a dict of {metric name: score}
+        """
+
+        metrics = {
+            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
+            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
+            "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
+        }[iou_type]
+
+        if coco_eval is None:
+            self._logger.warn("No predictions from the model!")
+            return {metric: float("nan") for metric in metrics}
+
+        # the standard metrics
+        results = {
+            metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan")
+            for idx, metric in enumerate(metrics)
+        }
+        self._logger.info(
+            "Evaluation results for {}: \n".format(iou_type) + create_small_table(results)
+        )
+        if not np.isfinite(sum(results.values())):
+            self._logger.info("Some metrics cannot be computed and is shown as NaN.")
+
+        if class_names is None or len(class_names) <= 1:
+            return results
+        # Compute per-category AP
+        # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa
+        precisions = coco_eval.eval["precision"]
+        # precision has dims (iou, recall, cls, area range, max dets)
+        assert len(class_names) == precisions.shape[2]
+
+        results_per_category = []
+        for idx, name in enumerate(class_names):
+            # area range index 0: all area ranges
+            # max dets index -1: typically 100 per image
+            precision = precisions[:, :, idx, 0, -1]
+            precision = precision[precision > -1]
+            ap = np.mean(precision) if precision.size else float("nan")
+            results_per_category.append(("{}".format(name), float(ap * 100)))
+
+        # tabulate it
+        N_COLS = min(6, len(results_per_category) * 2)
+        results_flatten = list(itertools.chain(*results_per_category))
+        results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)])
+        table = tabulate(
+            results_2d,
+            tablefmt="pipe",
+            floatfmt=".3f",
+            headers=["category", "AP"] * (N_COLS // 2),
+            numalign="left",
+        )
+        self._logger.info("Per-category {} AP: \n".format(iou_type) + table)
+
+        results.update({"AP-" + name: ap for name, ap in results_per_category})
+        return results
+
+
+def instances_to_coco_json(instances, img_id):
+    """
+    Dump an "Instances" object to a COCO-format json that's used for evaluation.
+
+    Args:
+        instances (Instances):
+        img_id (int): the image id
+
+    Returns:
+        list[dict]: list of json annotations in COCO format.
+    """
+    num_instance = len(instances)
+    if num_instance == 0:
+        return []
+
+    boxes = instances.pred_boxes.tensor.numpy()
+    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    boxes = boxes.tolist()
+    scores = instances.scores.tolist()
+    classes = instances.pred_classes.tolist()
+
+    has_mask = instances.has("pred_masks")
+    if has_mask:
+        # use RLE to encode the masks, because they are too large and takes memory
+        # since this evaluator stores outputs of the entire dataset
+        rles = [
+            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
+            for mask in instances.pred_masks
+        ]
+        for rle in rles:
+            # "counts" is an array encoded by mask_util as a byte-stream. Python3's
+            # json writer which always produces strings cannot serialize a bytestream
+            # unless you decode it. Thankfully, utf-8 works out (which is also what
+            # the pycocotools/_mask.pyx does).
+            rle["counts"] = rle["counts"].decode("utf-8")
+
+    has_keypoints = instances.has("pred_keypoints")
+    if has_keypoints:
+        keypoints = instances.pred_keypoints
+
+    results = []
+    for k in range(num_instance):
+        result = {
+            "image_id": img_id,
+            "category_id": classes[k],
+            "bbox": boxes[k],
+            "score": scores[k],
+        }
+        if has_mask:
+            result["segmentation"] = rles[k]
+        if has_keypoints:
+            # In COCO annotations,
+            # keypoints coordinates are pixel indices.
+            # However our predictions are floating point coordinates.
+            # Therefore we subtract 0.5 to be consistent with the annotation format.
+            # This is the inverse of data loading logic in `datasets/coco.py`.
+            keypoints[k][:, :2] -= 0.5
+            result["keypoints"] = keypoints[k].flatten().tolist()
+        results.append(result)
+    return results
+
+
+# inspired from Detectron:
+# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
+def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None):
+    """
+    Evaluate detection proposal recall metrics. This function is a much
+    faster alternative to the official COCO API recall evaluation code. However,
+    it produces slightly different results.
+    """
+    # Record max overlap value for each gt box
+    # Return vector of overlap values
+    areas = {
+        "all": 0,
+        "small": 1,
+        "medium": 2,
+        "large": 3,
+        "96-128": 4,
+        "128-256": 5,
+        "256-512": 6,
+        "512-inf": 7,
+    }
+    area_ranges = [
+        [0 ** 2, 1e5 ** 2],  # all
+        [0 ** 2, 32 ** 2],  # small
+        [32 ** 2, 96 ** 2],  # medium
+        [96 ** 2, 1e5 ** 2],  # large
+        [96 ** 2, 128 ** 2],  # 96-128
+        [128 ** 2, 256 ** 2],  # 128-256
+        [256 ** 2, 512 ** 2],  # 256-512
+        [512 ** 2, 1e5 ** 2],
+    ]  # 512-inf
+    assert area in areas, "Unknown area range: {}".format(area)
+    area_range = area_ranges[areas[area]]
+    gt_overlaps = []
+    num_pos = 0
+
+    for prediction_dict in dataset_predictions:
+        predictions = prediction_dict["proposals"]
+
+        # sort predictions in descending order
+        # TODO maybe remove this and make it explicit in the documentation
+        inds = predictions.objectness_logits.sort(descending=True)[1]
+        predictions = predictions[inds]
+
+        ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"])
+        anno = coco_api.loadAnns(ann_ids)
+        gt_boxes = [
+            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
+            for obj in anno
+            if obj["iscrowd"] == 0
+        ]
+        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
+        gt_boxes = Boxes(gt_boxes)
+        gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0])
+
+        if len(gt_boxes) == 0 or len(predictions) == 0:
+            continue
+
+        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
+        gt_boxes = gt_boxes[valid_gt_inds]
+
+        num_pos += len(gt_boxes)
+
+        if len(gt_boxes) == 0:
+            continue
+
+        if limit is not None and len(predictions) > limit:
+            predictions = predictions[:limit]
+
+        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
+
+        _gt_overlaps = torch.zeros(len(gt_boxes))
+        for j in range(min(len(predictions), len(gt_boxes))):
+            # find which proposal box maximally covers each gt box
+            # and get the iou amount of coverage for each gt box
+            max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+
+            # find which gt box is 'best' covered (i.e. 'best' = most iou)
+            gt_ovr, gt_ind = max_overlaps.max(dim=0)
+            assert gt_ovr >= 0
+            # find the proposal box that covers the best covered gt box
+            box_ind = argmax_overlaps[gt_ind]
+            # record the iou coverage of this gt box
+            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+            assert _gt_overlaps[j] == gt_ovr
+            # mark the proposal box and the gt box as used
+            overlaps[box_ind, :] = -1
+            overlaps[:, gt_ind] = -1
+
+        # append recorded iou coverage level
+        gt_overlaps.append(_gt_overlaps)
+    gt_overlaps = (
+        torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
+    )
+    gt_overlaps, _ = torch.sort(gt_overlaps)
+
+    if thresholds is None:
+        step = 0.05
+        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
+    recalls = torch.zeros_like(thresholds)
+    # compute recall for each iou threshold
+    for i, t in enumerate(thresholds):
+        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
+    # ar = 2 * np.trapz(recalls, thresholds)
+    ar = recalls.mean()
+    return {
+        "ar": ar,
+        "recalls": recalls,
+        "thresholds": thresholds,
+        "gt_overlaps": gt_overlaps,
+        "num_pos": num_pos,
+    }
+
+
+def _evaluate_predictions_on_coco(
+    coco_gt, coco_results, iou_type, kpt_oks_sigmas=None, use_fast_impl=True, img_ids=None
+):
+    """
+    Evaluate the coco results using COCOEval API.
+    """
+    assert len(coco_results) > 0
+
+    if iou_type == "segm":
+        coco_results = copy.deepcopy(coco_results)
+        # When evaluating mask AP, if the results contain bbox, cocoapi will
+        # use the box area as the area of the instance, instead of the mask area.
+        # This leads to a different definition of small/medium/large.
+        # We remove the bbox field to let mask AP use mask area.
+        for c in coco_results:
+            c.pop("bbox", None)
+
+    coco_dt = coco_gt.loadRes(coco_results)
+    coco_eval = (COCOeval_opt if use_fast_impl else COCOeval)(coco_gt, coco_dt, iou_type)
+    if img_ids is not None:
+        coco_eval.params.imgIds = img_ids
+
+    if iou_type == "keypoints":
+        # Use the COCO default keypoint OKS sigmas unless overrides are specified
+        if kpt_oks_sigmas:
+            assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "pycocotools is too old!"
+            coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas)
+        # COCOAPI requires every detection and every gt to have keypoints, so
+        # we just take the first entry from both
+        num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3
+        num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3
+        num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas)
+        assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, (
+            f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. "
+            f"Ground truth contains {num_keypoints_gt} keypoints. "
+            f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. "
+            "They have to agree with each other. For meaning of OKS, please refer to "
+            "http://cocodataset.org/#keypoints-eval."
+        )
+
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+
+    return coco_eval
diff --git a/src/sts/detectron2/evaluation/evaluator.py b/src/sts/detectron2/evaluation/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..07da68163e214d7fefac95868d95a91c953f8f37
--- /dev/null
+++ b/src/sts/detectron2/evaluation/evaluator.py
@@ -0,0 +1,200 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import datetime
+import logging
+import time
+from collections import OrderedDict
+from contextlib import ExitStack, contextmanager
+import torch
+from torch import nn
+
+from detectron2.utils.comm import get_world_size, is_main_process
+from detectron2.utils.logger import log_every_n_seconds
+
+
+class DatasetEvaluator:
+    """
+    Base class for a dataset evaluator.
+
+    The function :func:`inference_on_dataset` runs the model over
+    all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
+
+    This class will accumulate information of the inputs/outputs (by :meth:`process`),
+    and produce evaluation results in the end (by :meth:`evaluate`).
+    """
+
+    def reset(self):
+        """
+        Preparation for a new round of evaluation.
+        Should be called before starting a round of evaluation.
+        """
+        pass
+
+    def process(self, inputs, outputs):
+        """
+        Process the pair of inputs and outputs.
+        If they contain batches, the pairs can be consumed one-by-one using `zip`:
+
+        .. code-block:: python
+
+            for input_, output in zip(inputs, outputs):
+                # do evaluation on single input/output pair
+                ...
+
+        Args:
+            inputs (list): the inputs that's used to call the model.
+            outputs (list): the return value of `model(inputs)`
+        """
+        pass
+
+    def evaluate(self):
+        """
+        Evaluate/summarize the performance, after processing all input/output pairs.
+
+        Returns:
+            dict:
+                A new evaluator class can return a dict of arbitrary format
+                as long as the user can process the results.
+                In our train_net.py, we expect the following format:
+
+                * key: the name of the task (e.g., bbox)
+                * value: a dict of {metric name: score}, e.g.: {"AP50": 80}
+        """
+        pass
+
+
+class DatasetEvaluators(DatasetEvaluator):
+    """
+    Wrapper class to combine multiple :class:`DatasetEvaluator` instances.
+
+    This class dispatches every evaluation call to
+    all of its :class:`DatasetEvaluator`.
+    """
+
+    def __init__(self, evaluators):
+        """
+        Args:
+            evaluators (list): the evaluators to combine.
+        """
+        super().__init__()
+        self._evaluators = evaluators
+
+    def reset(self):
+        for evaluator in self._evaluators:
+            evaluator.reset()
+
+    def process(self, inputs, outputs):
+        for evaluator in self._evaluators:
+            evaluator.process(inputs, outputs)
+
+    def evaluate(self):
+        results = OrderedDict()
+        for evaluator in self._evaluators:
+            result = evaluator.evaluate()
+            if is_main_process() and result is not None:
+                for k, v in result.items():
+                    assert (
+                        k not in results
+                    ), "Different evaluators produce results with the same key {}".format(k)
+                    results[k] = v
+        return results
+
+
+def inference_on_dataset(model, data_loader, evaluator):
+    """
+    Run model on the data_loader and evaluate the metrics with evaluator.
+    Also benchmark the inference speed of `model.__call__` accurately.
+    The model will be used in eval mode.
+
+    Args:
+        model (callable): a callable which takes an object from
+            `data_loader` and returns some outputs.
+
+            If it's an nn.Module, it will be temporarily set to `eval` mode.
+            If you wish to evaluate a model in `training` mode instead, you can
+            wrap the given model and override its behavior of `.eval()` and `.train()`.
+        data_loader: an iterable object with a length.
+            The elements it generates will be the inputs to the model.
+        evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want
+            to benchmark, but don't want to do any evaluation.
+
+    Returns:
+        The return value of `evaluator.evaluate()`
+    """
+    num_devices = get_world_size()
+    logger = logging.getLogger(__name__)
+    logger.info("Start inference on {} images".format(len(data_loader)))
+
+    total = len(data_loader)  # inference data loader must have a fixed length
+    if evaluator is None:
+        # create a no-op evaluator
+        evaluator = DatasetEvaluators([])
+    evaluator.reset()
+
+    num_warmup = min(5, total - 1)
+    start_time = time.perf_counter()
+    total_compute_time = 0
+    with ExitStack() as stack:
+        if isinstance(model, nn.Module):
+            stack.enter_context(inference_context(model))
+        stack.enter_context(torch.no_grad())
+
+        for idx, inputs in enumerate(data_loader):
+            if idx == num_warmup:
+                start_time = time.perf_counter()
+                total_compute_time = 0
+
+            start_compute_time = time.perf_counter()
+            outputs = model(inputs)
+            if torch.cuda.is_available():
+                torch.cuda.synchronize()
+            total_compute_time += time.perf_counter() - start_compute_time
+            evaluator.process(inputs, outputs)
+            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
+            seconds_per_img = total_compute_time / iters_after_start
+            if idx >= num_warmup * 2 or seconds_per_img > 5:
+                total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
+                eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
+                log_every_n_seconds(
+                    logging.INFO,
+                    "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
+                        idx + 1, total, seconds_per_img, str(eta)
+                    ),
+                    n=5,
+                )
+
+    # Measure the time only for this worker (before the synchronization barrier)
+    total_time = time.perf_counter() - start_time
+    total_time_str = str(datetime.timedelta(seconds=total_time))
+    # NOTE this format is parsed by grep
+    logger.info(
+        "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format(
+            total_time_str, total_time / (total - num_warmup), num_devices
+        )
+    )
+    total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
+    logger.info(
+        "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format(
+            total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
+        )
+    )
+    results = evaluator.evaluate()
+    # An evaluator may return None when not in main process.
+    # Replace it by an empty dict instead to make it easier for downstream code to handle
+    if results is None:
+        results = {}
+    return results
+
+
+@contextmanager
+def inference_context(model):
+    """
+    A context where the model is temporarily changed to eval mode,
+    and restored to previous mode afterwards.
+
+    Args:
+        model: a torch Module
+    """
+    training_mode = model.training
+    model.eval()
+    yield
+    model.train(training_mode)
diff --git a/src/sts/detectron2/evaluation/fast_eval_api.py b/src/sts/detectron2/evaluation/fast_eval_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..2eb202bd5efa3ec3d366027b1debffc269ae8b17
--- /dev/null
+++ b/src/sts/detectron2/evaluation/fast_eval_api.py
@@ -0,0 +1,121 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import numpy as np
+import time
+from pycocotools.cocoeval import COCOeval
+
+from detectron2 import _C
+
+logger = logging.getLogger(__name__)
+
+
+class COCOeval_opt(COCOeval):
+    """
+    This is a slightly modified version of the original COCO API, where the functions evaluateImg()
+    and accumulate() are implemented in C++ to speedup evaluation
+    """
+
+    def evaluate(self):
+        """
+        Run per image evaluation on given images and store results in self.evalImgs_cpp, a
+        datastructure that isn't readable from Python but is used by a c++ implementation of
+        accumulate().  Unlike the original COCO PythonAPI, we don't populate the datastructure
+        self.evalImgs because this datastructure is a computational bottleneck.
+        :return: None
+        """
+        tic = time.time()
+
+        p = self.params
+        # add backward compatibility if useSegm is specified in params
+        if p.useSegm is not None:
+            p.iouType = "segm" if p.useSegm == 1 else "bbox"
+        logger.info("Evaluate annotation type *{}*".format(p.iouType))
+        p.imgIds = list(np.unique(p.imgIds))
+        if p.useCats:
+            p.catIds = list(np.unique(p.catIds))
+        p.maxDets = sorted(p.maxDets)
+        self.params = p
+
+        self._prepare()  # bottleneck
+
+        # loop through images, area range, max detection number
+        catIds = p.catIds if p.useCats else [-1]
+
+        if p.iouType == "segm" or p.iouType == "bbox":
+            computeIoU = self.computeIoU
+        elif p.iouType == "keypoints":
+            computeIoU = self.computeOks
+        self.ious = {
+            (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
+        }  # bottleneck
+
+        maxDet = p.maxDets[-1]
+
+        # <<<< Beginning of code differences with original COCO API
+        def convert_instances_to_cpp(instances, is_det=False):
+            # Convert annotations for a list of instances in an image to a format that's fast
+            # to access in C++
+            instances_cpp = []
+            for instance in instances:
+                instance_cpp = _C.InstanceAnnotation(
+                    int(instance["id"]),
+                    instance["score"] if is_det else instance.get("score", 0.0),
+                    instance["area"],
+                    bool(instance.get("iscrowd", 0)),
+                    bool(instance.get("ignore", 0)),
+                )
+                instances_cpp.append(instance_cpp)
+            return instances_cpp
+
+        # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
+        ground_truth_instances = [
+            [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
+            for imgId in p.imgIds
+        ]
+        detected_instances = [
+            [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds]
+            for imgId in p.imgIds
+        ]
+        ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
+
+        if not p.useCats:
+            # For each image, flatten per-category lists into a single list
+            ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances]
+            detected_instances = [[[o for c in i for o in c]] for i in detected_instances]
+
+        # Call C++ implementation of self.evaluateImgs()
+        self._evalImgs_cpp = _C.COCOevalEvaluateImages(
+            p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances
+        )
+        self._evalImgs = None
+
+        self._paramsEval = copy.deepcopy(self.params)
+        toc = time.time()
+        logger.info("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic))
+        # >>>> End of code differences with original COCO API
+
+    def accumulate(self):
+        """
+        Accumulate per image evaluation results and store the result in self.eval.  Does not
+        support changing parameter settings from those used by self.evaluate()
+        """
+        logger.info("Accumulating evaluation results...")
+        tic = time.time()
+        assert hasattr(
+            self, "_evalImgs_cpp"
+        ), "evaluate() must be called before accmulate() is called."
+
+        self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
+
+        # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
+        self.eval["recall"] = np.array(self.eval["recall"]).reshape(
+            self.eval["counts"][:1] + self.eval["counts"][2:]
+        )
+
+        # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
+        # num_area_ranges X num_max_detections
+        self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"])
+        self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
+        toc = time.time()
+        logger.info("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic))
diff --git a/src/sts/detectron2/evaluation/lvis_evaluation.py b/src/sts/detectron2/evaluation/lvis_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..81f08e40cf61f0c451e63565debac7f6877b99d9
--- /dev/null
+++ b/src/sts/detectron2/evaluation/lvis_evaluation.py
@@ -0,0 +1,358 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import json
+import logging
+import os
+import pickle
+from collections import OrderedDict
+import torch
+
+import detectron2.utils.comm as comm
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+
+from .coco_evaluation import instances_to_coco_json
+from .evaluator import DatasetEvaluator
+
+
+class LVISEvaluator(DatasetEvaluator):
+    """
+    Evaluate object proposal and instance detection/segmentation outputs using
+    LVIS's metrics and evaluation API.
+    """
+
+    def __init__(self, dataset_name, tasks=None, distributed=True, output_dir=None):
+        """
+        Args:
+            dataset_name (str): name of the dataset to be evaluated.
+                It must have the following corresponding metadata:
+                "json_file": the path to the LVIS format annotation
+            tasks (tuple[str]): tasks that can be evaluated under the given
+                configuration. A task is one of "bbox", "segm".
+                By default, will infer this automatically from predictions.
+            distributed (True): if True, will collect results from all ranks for evaluation.
+                Otherwise, will evaluate the results in the current process.
+            output_dir (str): optional, an output directory to dump results.
+        """
+        from lvis import LVIS
+
+        self._logger = logging.getLogger(__name__)
+
+        if tasks is not None and isinstance(tasks, CfgNode):
+            self._logger.warn(
+                "COCO Evaluator instantiated using config, this is deprecated behavior."
+                " Please pass in explicit arguments instead."
+            )
+            self._tasks = None  # Infering it from predictions should be better
+        else:
+            self._tasks = tasks
+
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+
+        self._metadata = MetadataCatalog.get(dataset_name)
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        self._lvis_api = LVIS(json_file)
+        # Test set json files do not contain annotations (evaluation must be
+        # performed using the LVIS evaluation server).
+        self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0
+
+    def reset(self):
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a LVIS model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+        """
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+
+            if "instances" in output:
+                instances = output["instances"].to(self._cpu_device)
+                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
+            if "proposals" in output:
+                prediction["proposals"] = output["proposals"].to(self._cpu_device)
+            self._predictions.append(prediction)
+
+    def evaluate(self):
+        if self._distributed:
+            comm.synchronize()
+            predictions = comm.gather(self._predictions, dst=0)
+            predictions = list(itertools.chain(*predictions))
+
+            if not comm.is_main_process():
+                return
+        else:
+            predictions = self._predictions
+
+        if len(predictions) == 0:
+            self._logger.warning("[LVISEvaluator] Did not receive valid predictions.")
+            return {}
+
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "instances_predictions.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(predictions, f)
+
+        self._results = OrderedDict()
+        if "proposals" in predictions[0]:
+            self._eval_box_proposals(predictions)
+        if "instances" in predictions[0]:
+            self._eval_predictions(predictions)
+        # Copy so the caller can do whatever with results
+        return copy.deepcopy(self._results)
+
+    def _tasks_from_predictions(self, predictions):
+        for pred in predictions:
+            if "segmentation" in pred:
+                return ("bbox", "segm")
+        return ("bbox",)
+
+    def _eval_predictions(self, predictions):
+        """
+        Evaluate predictions. Fill self._results with the metrics of the tasks.
+
+        Args:
+            predictions (list[dict]): list of outputs from the model
+        """
+        self._logger.info("Preparing results in the LVIS format ...")
+        lvis_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        tasks = self._tasks or self._tasks_from_predictions(lvis_results)
+
+        # LVIS evaluator can be used to evaluate results for COCO dataset categories.
+        # In this case `_metadata` variable will have a field with COCO-specific category mapping.
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            reverse_id_mapping = {
+                v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+            }
+            for result in lvis_results:
+                result["category_id"] = reverse_id_mapping[result["category_id"]]
+        else:
+            # unmap the category ids for LVIS (from 0-indexed to 1-indexed)
+            for result in lvis_results:
+                result["category_id"] += 1
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "lvis_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(lvis_results))
+                f.flush()
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info("Evaluating predictions ...")
+        for task in sorted(tasks):
+            res = _evaluate_predictions_on_lvis(
+                self._lvis_api, lvis_results, task, class_names=self._metadata.get("thing_classes")
+            )
+            self._results[task] = res
+
+    def _eval_box_proposals(self, predictions):
+        """
+        Evaluate the box proposals in predictions.
+        Fill self._results with the metrics for "box_proposals" task.
+        """
+        if self._output_dir:
+            # Saving generated box proposals to file.
+            # Predicted box_proposals are in XYXY_ABS mode.
+            bbox_mode = BoxMode.XYXY_ABS.value
+            ids, boxes, objectness_logits = [], [], []
+            for prediction in predictions:
+                ids.append(prediction["image_id"])
+                boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
+                objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
+
+            proposal_data = {
+                "boxes": boxes,
+                "objectness_logits": objectness_logits,
+                "ids": ids,
+                "bbox_mode": bbox_mode,
+            }
+            with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
+                pickle.dump(proposal_data, f)
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info("Evaluating bbox proposals ...")
+        res = {}
+        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
+        for limit in [100, 1000]:
+            for area, suffix in areas.items():
+                stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit)
+                key = "AR{}@{:d}".format(suffix, limit)
+                res[key] = float(stats["ar"].item() * 100)
+        self._logger.info("Proposal metrics: \n" + create_small_table(res))
+        self._results["box_proposals"] = res
+
+
+# inspired from Detectron:
+# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
+def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None):
+    """
+    Evaluate detection proposal recall metrics. This function is a much
+    faster alternative to the official LVIS API recall evaluation code. However,
+    it produces slightly different results.
+    """
+    # Record max overlap value for each gt box
+    # Return vector of overlap values
+    areas = {
+        "all": 0,
+        "small": 1,
+        "medium": 2,
+        "large": 3,
+        "96-128": 4,
+        "128-256": 5,
+        "256-512": 6,
+        "512-inf": 7,
+    }
+    area_ranges = [
+        [0 ** 2, 1e5 ** 2],  # all
+        [0 ** 2, 32 ** 2],  # small
+        [32 ** 2, 96 ** 2],  # medium
+        [96 ** 2, 1e5 ** 2],  # large
+        [96 ** 2, 128 ** 2],  # 96-128
+        [128 ** 2, 256 ** 2],  # 128-256
+        [256 ** 2, 512 ** 2],  # 256-512
+        [512 ** 2, 1e5 ** 2],
+    ]  # 512-inf
+    assert area in areas, "Unknown area range: {}".format(area)
+    area_range = area_ranges[areas[area]]
+    gt_overlaps = []
+    num_pos = 0
+
+    for prediction_dict in dataset_predictions:
+        predictions = prediction_dict["proposals"]
+
+        # sort predictions in descending order
+        # TODO maybe remove this and make it explicit in the documentation
+        inds = predictions.objectness_logits.sort(descending=True)[1]
+        predictions = predictions[inds]
+
+        ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]])
+        anno = lvis_api.load_anns(ann_ids)
+        gt_boxes = [
+            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno
+        ]
+        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
+        gt_boxes = Boxes(gt_boxes)
+        gt_areas = torch.as_tensor([obj["area"] for obj in anno])
+
+        if len(gt_boxes) == 0 or len(predictions) == 0:
+            continue
+
+        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
+        gt_boxes = gt_boxes[valid_gt_inds]
+
+        num_pos += len(gt_boxes)
+
+        if len(gt_boxes) == 0:
+            continue
+
+        if limit is not None and len(predictions) > limit:
+            predictions = predictions[:limit]
+
+        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
+
+        _gt_overlaps = torch.zeros(len(gt_boxes))
+        for j in range(min(len(predictions), len(gt_boxes))):
+            # find which proposal box maximally covers each gt box
+            # and get the iou amount of coverage for each gt box
+            max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+
+            # find which gt box is 'best' covered (i.e. 'best' = most iou)
+            gt_ovr, gt_ind = max_overlaps.max(dim=0)
+            assert gt_ovr >= 0
+            # find the proposal box that covers the best covered gt box
+            box_ind = argmax_overlaps[gt_ind]
+            # record the iou coverage of this gt box
+            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+            assert _gt_overlaps[j] == gt_ovr
+            # mark the proposal box and the gt box as used
+            overlaps[box_ind, :] = -1
+            overlaps[:, gt_ind] = -1
+
+        # append recorded iou coverage level
+        gt_overlaps.append(_gt_overlaps)
+    gt_overlaps = (
+        torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
+    )
+    gt_overlaps, _ = torch.sort(gt_overlaps)
+
+    if thresholds is None:
+        step = 0.05
+        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
+    recalls = torch.zeros_like(thresholds)
+    # compute recall for each iou threshold
+    for i, t in enumerate(thresholds):
+        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
+    # ar = 2 * np.trapz(recalls, thresholds)
+    ar = recalls.mean()
+    return {
+        "ar": ar,
+        "recalls": recalls,
+        "thresholds": thresholds,
+        "gt_overlaps": gt_overlaps,
+        "num_pos": num_pos,
+    }
+
+
+def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, class_names=None):
+    """
+    Args:
+        iou_type (str):
+        kpt_oks_sigmas (list[float]):
+        class_names (None or list[str]): if provided, will use it to predict
+            per-category AP.
+
+    Returns:
+        a dict of {metric name: score}
+    """
+    metrics = {
+        "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
+        "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
+    }[iou_type]
+
+    logger = logging.getLogger(__name__)
+
+    if len(lvis_results) == 0:  # TODO: check if needed
+        logger.warn("No predictions from the model!")
+        return {metric: float("nan") for metric in metrics}
+
+    if iou_type == "segm":
+        lvis_results = copy.deepcopy(lvis_results)
+        # When evaluating mask AP, if the results contain bbox, LVIS API will
+        # use the box area as the area of the instance, instead of the mask area.
+        # This leads to a different definition of small/medium/large.
+        # We remove the bbox field to let mask AP use mask area.
+        for c in lvis_results:
+            c.pop("bbox", None)
+
+    from lvis import LVISEval, LVISResults
+
+    lvis_results = LVISResults(lvis_gt, lvis_results)
+    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
+    lvis_eval.run()
+    lvis_eval.print_results()
+
+    # Pull the standard metrics from the LVIS results
+    results = lvis_eval.get_results()
+    results = {metric: float(results[metric] * 100) for metric in metrics}
+    logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results))
+    return results
diff --git a/src/sts/detectron2/evaluation/panoptic_evaluation.py b/src/sts/detectron2/evaluation/panoptic_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..9fb3462b7f9abf6feaa499976bfed526ebd17e31
--- /dev/null
+++ b/src/sts/detectron2/evaluation/panoptic_evaluation.py
@@ -0,0 +1,199 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import tempfile
+from collections import OrderedDict
+from typing import Optional
+from PIL import Image
+from tabulate import tabulate
+
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+logger = logging.getLogger(__name__)
+
+
+class COCOPanopticEvaluator(DatasetEvaluator):
+    """
+    Evaluate Panoptic Quality metrics on COCO using PanopticAPI.
+    It saves panoptic segmentation prediction in `output_dir`
+
+    It contains a synchronize call and has to be called from all workers.
+    """
+
+    def __init__(self, dataset_name: str, output_dir: Optional[str] = None):
+        """
+        Args:
+            dataset_name: name of the dataset
+            output_dir: output directory to save results for evaluation.
+        """
+        self._metadata = MetadataCatalog.get(dataset_name)
+        self._thing_contiguous_id_to_dataset_id = {
+            v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+        }
+        self._stuff_contiguous_id_to_dataset_id = {
+            v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items()
+        }
+
+        self._output_dir = output_dir
+        if self._output_dir is not None:
+            PathManager.mkdirs(self._output_dir)
+
+    def reset(self):
+        self._predictions = []
+
+    def _convert_category_id(self, segment_info):
+        isthing = segment_info.pop("isthing", None)
+        if isthing is None:
+            # the model produces panoptic category id directly. No more conversion needed
+            return segment_info
+        if isthing is True:
+            segment_info["category_id"] = self._thing_contiguous_id_to_dataset_id[
+                segment_info["category_id"]
+            ]
+        else:
+            segment_info["category_id"] = self._stuff_contiguous_id_to_dataset_id[
+                segment_info["category_id"]
+            ]
+        return segment_info
+
+    def process(self, inputs, outputs):
+        from panopticapi.utils import id2rgb
+
+        for input, output in zip(inputs, outputs):
+            panoptic_img, segments_info = output["panoptic_seg"]
+            panoptic_img = panoptic_img.cpu().numpy()
+            if segments_info is None:
+                # If "segments_info" is None, we assume "panoptic_img" is a
+                # H*W int32 image storing the panoptic_id in the format of
+                # category_id * label_divisor + instance_id. We reserve -1 for
+                # VOID label, and add 1 to panoptic_img since the official
+                # evaluation script uses 0 for VOID label.
+                label_divisor = self._metadata.label_divisor
+                segments_info = []
+                for panoptic_label in np.unique(panoptic_img):
+                    if panoptic_label == -1:
+                        # VOID region.
+                        continue
+                    pred_class = panoptic_label // label_divisor
+                    isthing = (
+                        pred_class in self._metadata.thing_dataset_id_to_contiguous_id.values()
+                    )
+                    segments_info.append(
+                        {
+                            "id": int(panoptic_label) + 1,
+                            "category_id": int(pred_class),
+                            "isthing": bool(isthing),
+                        }
+                    )
+                # Official evaluation script uses 0 for VOID label.
+                panoptic_img += 1
+
+            file_name = os.path.basename(input["file_name"])
+            file_name_png = os.path.splitext(file_name)[0] + ".png"
+            with io.BytesIO() as out:
+                Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG")
+                segments_info = [self._convert_category_id(x) for x in segments_info]
+                self._predictions.append(
+                    {
+                        "image_id": input["image_id"],
+                        "file_name": file_name_png,
+                        "png_string": out.getvalue(),
+                        "segments_info": segments_info,
+                    }
+                )
+
+    def evaluate(self):
+        comm.synchronize()
+
+        self._predictions = comm.gather(self._predictions)
+        self._predictions = list(itertools.chain(*self._predictions))
+        if not comm.is_main_process():
+            return
+
+        # PanopticApi requires local files
+        gt_json = PathManager.get_local_path(self._metadata.panoptic_json)
+        gt_folder = PathManager.get_local_path(self._metadata.panoptic_root)
+
+        with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir:
+            logger.info("Writing all panoptic predictions to {} ...".format(pred_dir))
+            for p in self._predictions:
+                with open(os.path.join(pred_dir, p["file_name"]), "wb") as f:
+                    f.write(p.pop("png_string"))
+
+            with open(gt_json, "r") as f:
+                json_data = json.load(f)
+            json_data["annotations"] = self._predictions
+
+            output_dir = self._output_dir or pred_dir
+            predictions_json = os.path.join(output_dir, "predictions.json")
+            with PathManager.open(predictions_json, "w") as f:
+                f.write(json.dumps(json_data))
+
+            from panopticapi.evaluation import pq_compute
+
+            with contextlib.redirect_stdout(io.StringIO()):
+                pq_res = pq_compute(
+                    gt_json,
+                    PathManager.get_local_path(predictions_json),
+                    gt_folder=gt_folder,
+                    pred_folder=pred_dir,
+                )
+
+        res = {}
+        res["PQ"] = 100 * pq_res["All"]["pq"]
+        res["SQ"] = 100 * pq_res["All"]["sq"]
+        res["RQ"] = 100 * pq_res["All"]["rq"]
+        res["PQ_th"] = 100 * pq_res["Things"]["pq"]
+        res["SQ_th"] = 100 * pq_res["Things"]["sq"]
+        res["RQ_th"] = 100 * pq_res["Things"]["rq"]
+        res["PQ_st"] = 100 * pq_res["Stuff"]["pq"]
+        res["SQ_st"] = 100 * pq_res["Stuff"]["sq"]
+        res["RQ_st"] = 100 * pq_res["Stuff"]["rq"]
+
+        results = OrderedDict({"panoptic_seg": res})
+        _print_panoptic_results(pq_res)
+
+        return results
+
+
+def _print_panoptic_results(pq_res):
+    headers = ["", "PQ", "SQ", "RQ", "#categories"]
+    data = []
+    for name in ["All", "Things", "Stuff"]:
+        row = [name] + [pq_res[name][k] * 100 for k in ["pq", "sq", "rq"]] + [pq_res[name]["n"]]
+        data.append(row)
+    table = tabulate(
+        data, headers=headers, tablefmt="pipe", floatfmt=".3f", stralign="center", numalign="center"
+    )
+    logger.info("Panoptic Evaluation Results:\n" + table)
+
+
+if __name__ == "__main__":
+    from detectron2.utils.logger import setup_logger
+
+    logger = setup_logger()
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--gt-json")
+    parser.add_argument("--gt-dir")
+    parser.add_argument("--pred-json")
+    parser.add_argument("--pred-dir")
+    args = parser.parse_args()
+
+    from panopticapi.evaluation import pq_compute
+
+    with contextlib.redirect_stdout(io.StringIO()):
+        pq_res = pq_compute(
+            args.gt_json, args.pred_json, gt_folder=args.gt_dir, pred_folder=args.pred_dir
+        )
+        _print_panoptic_results(pq_res)
diff --git a/src/sts/detectron2/evaluation/pascal_voc_evaluation.py b/src/sts/detectron2/evaluation/pascal_voc_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d1abcde2f87bb5f103e73cb364aaabbecb6e619
--- /dev/null
+++ b/src/sts/detectron2/evaluation/pascal_voc_evaluation.py
@@ -0,0 +1,300 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+import numpy as np
+import os
+import tempfile
+import xml.etree.ElementTree as ET
+from collections import OrderedDict, defaultdict
+from functools import lru_cache
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+
+class PascalVOCDetectionEvaluator(DatasetEvaluator):
+    """
+    Evaluate Pascal VOC style AP for Pascal VOC dataset.
+    It contains a synchronization, therefore has to be called from all ranks.
+
+    Note that the concept of AP can be implemented in different ways and may not
+    produce identical results. This class mimics the implementation of the official
+    Pascal VOC Matlab API, and should produce similar but not identical results to the
+    official API.
+    """
+
+    def __init__(self, dataset_name):
+        """
+        Args:
+            dataset_name (str): name of the dataset, e.g., "voc_2007_test"
+        """
+        self._dataset_name = dataset_name
+        meta = MetadataCatalog.get(dataset_name)
+
+        # Too many tiny files, download all to local for speed.
+        annotation_dir_local = PathManager.get_local_path(
+            os.path.join(meta.dirname, "Annotations/")
+        )
+        self._anno_file_template = os.path.join(annotation_dir_local, "{}.xml")
+        self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt")
+        self._class_names = meta.thing_classes
+        assert meta.year in [2007, 2012], meta.year
+        self._is_2007 = meta.year == 2007
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+
+    def reset(self):
+        self._predictions = defaultdict(list)  # class name -> list of prediction strings
+
+    def process(self, inputs, outputs):
+        for input, output in zip(inputs, outputs):
+            image_id = input["image_id"]
+            instances = output["instances"].to(self._cpu_device)
+            boxes = instances.pred_boxes.tensor.numpy()
+            scores = instances.scores.tolist()
+            classes = instances.pred_classes.tolist()
+            for box, score, cls in zip(boxes, scores, classes):
+                xmin, ymin, xmax, ymax = box
+                # The inverse of data loading logic in `datasets/pascal_voc.py`
+                xmin += 1
+                ymin += 1
+                self._predictions[cls].append(
+                    f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}"
+                )
+
+    def evaluate(self):
+        """
+        Returns:
+            dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75".
+        """
+        all_predictions = comm.gather(self._predictions, dst=0)
+        if not comm.is_main_process():
+            return
+        predictions = defaultdict(list)
+        for predictions_per_rank in all_predictions:
+            for clsid, lines in predictions_per_rank.items():
+                predictions[clsid].extend(lines)
+        del all_predictions
+
+        self._logger.info(
+            "Evaluating {} using {} metric. "
+            "Note that results do not use the official Matlab API.".format(
+                self._dataset_name, 2007 if self._is_2007 else 2012
+            )
+        )
+
+        with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname:
+            res_file_template = os.path.join(dirname, "{}.txt")
+
+            aps = defaultdict(list)  # iou -> ap per class
+            for cls_id, cls_name in enumerate(self._class_names):
+                lines = predictions.get(cls_id, [""])
+
+                with open(res_file_template.format(cls_name), "w") as f:
+                    f.write("\n".join(lines))
+
+                for thresh in range(50, 100, 5):
+                    rec, prec, ap = voc_eval(
+                        res_file_template,
+                        self._anno_file_template,
+                        self._image_set_path,
+                        cls_name,
+                        ovthresh=thresh / 100.0,
+                        use_07_metric=self._is_2007,
+                    )
+                    aps[thresh].append(ap * 100)
+
+        ret = OrderedDict()
+        mAP = {iou: np.mean(x) for iou, x in aps.items()}
+        ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]}
+        return ret
+
+
+##############################################################################
+#
+# Below code is modified from
+# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py
+# --------------------------------------------------------
+# Fast/er R-CNN
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Bharath Hariharan
+# --------------------------------------------------------
+
+"""Python implementation of the PASCAL VOC devkit's AP evaluation code."""
+
+
+@lru_cache(maxsize=None)
+def parse_rec(filename):
+    """Parse a PASCAL VOC xml file."""
+    with PathManager.open(filename) as f:
+        tree = ET.parse(f)
+    objects = []
+    for obj in tree.findall("object"):
+        obj_struct = {}
+        obj_struct["name"] = obj.find("name").text
+        obj_struct["pose"] = obj.find("pose").text
+        obj_struct["truncated"] = int(obj.find("truncated").text)
+        obj_struct["difficult"] = int(obj.find("difficult").text)
+        bbox = obj.find("bndbox")
+        obj_struct["bbox"] = [
+            int(bbox.find("xmin").text),
+            int(bbox.find("ymin").text),
+            int(bbox.find("xmax").text),
+            int(bbox.find("ymax").text),
+        ]
+        objects.append(obj_struct)
+
+    return objects
+
+
+def voc_ap(rec, prec, use_07_metric=False):
+    """Compute VOC AP given precision and recall. If use_07_metric is true, uses
+    the VOC 07 11-point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.0
+        for t in np.arange(0.0, 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.0
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.0], rec, [1.0]))
+        mpre = np.concatenate(([0.0], prec, [0.0]))
+
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+
+def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False):
+    """rec, prec, ap = voc_eval(detpath,
+                                annopath,
+                                imagesetfile,
+                                classname,
+                                [ovthresh],
+                                [use_07_metric])
+
+    Top level function that does the PASCAL VOC evaluation.
+
+    detpath: Path to detections
+        detpath.format(classname) should produce the detection results file.
+    annopath: Path to annotations
+        annopath.format(imagename) should be the xml annotations file.
+    imagesetfile: Text file containing the list of images, one image per line.
+    classname: Category name (duh)
+    [ovthresh]: Overlap threshold (default = 0.5)
+    [use_07_metric]: Whether to use VOC07's 11 point AP computation
+        (default False)
+    """
+    # assumes detections are in detpath.format(classname)
+    # assumes annotations are in annopath.format(imagename)
+    # assumes imagesetfile is a text file with each line an image name
+
+    # first load gt
+    # read list of images
+    with PathManager.open(imagesetfile, "r") as f:
+        lines = f.readlines()
+    imagenames = [x.strip() for x in lines]
+
+    # load annots
+    recs = {}
+    for imagename in imagenames:
+        recs[imagename] = parse_rec(annopath.format(imagename))
+
+    # extract gt objects for this class
+    class_recs = {}
+    npos = 0
+    for imagename in imagenames:
+        R = [obj for obj in recs[imagename] if obj["name"] == classname]
+        bbox = np.array([x["bbox"] for x in R])
+        difficult = np.array([x["difficult"] for x in R]).astype(np.bool)
+        # difficult = np.array([False for x in R]).astype(np.bool)  # treat all "difficult" as GT
+        det = [False] * len(R)
+        npos = npos + sum(~difficult)
+        class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det}
+
+    # read dets
+    detfile = detpath.format(classname)
+    with open(detfile, "r") as f:
+        lines = f.readlines()
+
+    splitlines = [x.strip().split(" ") for x in lines]
+    image_ids = [x[0] for x in splitlines]
+    confidence = np.array([float(x[1]) for x in splitlines])
+    BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4)
+
+    # sort by confidence
+    sorted_ind = np.argsort(-confidence)
+    BB = BB[sorted_ind, :]
+    image_ids = [image_ids[x] for x in sorted_ind]
+
+    # go down dets and mark TPs and FPs
+    nd = len(image_ids)
+    tp = np.zeros(nd)
+    fp = np.zeros(nd)
+    for d in range(nd):
+        R = class_recs[image_ids[d]]
+        bb = BB[d, :].astype(float)
+        ovmax = -np.inf
+        BBGT = R["bbox"].astype(float)
+
+        if BBGT.size > 0:
+            # compute overlaps
+            # intersection
+            ixmin = np.maximum(BBGT[:, 0], bb[0])
+            iymin = np.maximum(BBGT[:, 1], bb[1])
+            ixmax = np.minimum(BBGT[:, 2], bb[2])
+            iymax = np.minimum(BBGT[:, 3], bb[3])
+            iw = np.maximum(ixmax - ixmin + 1.0, 0.0)
+            ih = np.maximum(iymax - iymin + 1.0, 0.0)
+            inters = iw * ih
+
+            # union
+            uni = (
+                (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)
+                + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0)
+                - inters
+            )
+
+            overlaps = inters / uni
+            ovmax = np.max(overlaps)
+            jmax = np.argmax(overlaps)
+
+        if ovmax > ovthresh:
+            if not R["difficult"][jmax]:
+                if not R["det"][jmax]:
+                    tp[d] = 1.0
+                    R["det"][jmax] = 1
+                else:
+                    fp[d] = 1.0
+        else:
+            fp[d] = 1.0
+
+    # compute precision recall
+    fp = np.cumsum(fp)
+    tp = np.cumsum(tp)
+    rec = tp / float(npos)
+    # avoid divide by zero in case the first detection matches a difficult
+    # ground truth
+    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+    ap = voc_ap(rec, prec, use_07_metric)
+
+    return rec, prec, ap
diff --git a/src/sts/detectron2/evaluation/rotated_coco_evaluation.py b/src/sts/detectron2/evaluation/rotated_coco_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea6d1b381dcf106339a03f08577df673ad439c46
--- /dev/null
+++ b/src/sts/detectron2/evaluation/rotated_coco_evaluation.py
@@ -0,0 +1,207 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import json
+import numpy as np
+import os
+import torch
+from pycocotools.cocoeval import COCOeval, maskUtils
+
+from detectron2.structures import BoxMode, RotatedBoxes, pairwise_iou_rotated
+from detectron2.utils.file_io import PathManager
+
+from .coco_evaluation import COCOEvaluator
+
+
+class RotatedCOCOeval(COCOeval):
+    @staticmethod
+    def is_rotated(box_list):
+        if type(box_list) == np.ndarray:
+            return box_list.shape[1] == 5
+        elif type(box_list) == list:
+            if box_list == []:  # cannot decide the box_dim
+                return False
+            return np.all(
+                np.array(
+                    [
+                        (len(obj) == 5) and ((type(obj) == list) or (type(obj) == np.ndarray))
+                        for obj in box_list
+                    ]
+                )
+            )
+        return False
+
+    @staticmethod
+    def boxlist_to_tensor(boxlist, output_box_dim):
+        if type(boxlist) == np.ndarray:
+            box_tensor = torch.from_numpy(boxlist)
+        elif type(boxlist) == list:
+            if boxlist == []:
+                return torch.zeros((0, output_box_dim), dtype=torch.float32)
+            else:
+                box_tensor = torch.FloatTensor(boxlist)
+        else:
+            raise Exception("Unrecognized boxlist type")
+
+        input_box_dim = box_tensor.shape[1]
+        if input_box_dim != output_box_dim:
+            if input_box_dim == 4 and output_box_dim == 5:
+                box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
+            else:
+                raise Exception(
+                    "Unable to convert from {}-dim box to {}-dim box".format(
+                        input_box_dim, output_box_dim
+                    )
+                )
+        return box_tensor
+
+    def compute_iou_dt_gt(self, dt, gt, is_crowd):
+        if self.is_rotated(dt) or self.is_rotated(gt):
+            # TODO: take is_crowd into consideration
+            assert all(c == 0 for c in is_crowd)
+            dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5))
+            gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5))
+            return pairwise_iou_rotated(dt, gt)
+        else:
+            # This is the same as the classical COCO evaluation
+            return maskUtils.iou(dt, gt, is_crowd)
+
+    def computeIoU(self, imgId, catId):
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId, catId]
+            dt = self._dts[imgId, catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        if len(gt) == 0 and len(dt) == 0:
+            return []
+        inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
+        dt = [dt[i] for i in inds]
+        if len(dt) > p.maxDets[-1]:
+            dt = dt[0 : p.maxDets[-1]]
+
+        assert p.iouType == "bbox", "unsupported iouType for iou computation"
+
+        g = [g["bbox"] for g in gt]
+        d = [d["bbox"] for d in dt]
+
+        # compute iou between each dt and gt region
+        iscrowd = [int(o["iscrowd"]) for o in gt]
+
+        # Note: this function is copied from cocoeval.py in cocoapi
+        # and the major difference is here.
+        ious = self.compute_iou_dt_gt(d, g, iscrowd)
+        return ious
+
+
+class RotatedCOCOEvaluator(COCOEvaluator):
+    """
+    Evaluate object proposal/instance detection outputs using COCO-like metrics and APIs,
+    with rotated boxes support.
+    Note: this uses IOU only and does not consider angle differences.
+    """
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a COCO model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+        """
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+
+            if "instances" in output:
+                instances = output["instances"].to(self._cpu_device)
+
+                prediction["instances"] = self.instances_to_json(instances, input["image_id"])
+            if "proposals" in output:
+                prediction["proposals"] = output["proposals"].to(self._cpu_device)
+            self._predictions.append(prediction)
+
+    def instances_to_json(self, instances, img_id):
+        num_instance = len(instances)
+        if num_instance == 0:
+            return []
+
+        boxes = instances.pred_boxes.tensor.numpy()
+        if boxes.shape[1] == 4:
+            boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+        boxes = boxes.tolist()
+        scores = instances.scores.tolist()
+        classes = instances.pred_classes.tolist()
+
+        results = []
+        for k in range(num_instance):
+            result = {
+                "image_id": img_id,
+                "category_id": classes[k],
+                "bbox": boxes[k],
+                "score": scores[k],
+            }
+
+            results.append(result)
+        return results
+
+    def _eval_predictions(self, predictions, img_ids=None):  # img_ids: unused
+        """
+        Evaluate predictions on the given tasks.
+        Fill self._results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+
+        # unmap the category ids for COCO
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            reverse_id_mapping = {
+                v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+            }
+            for result in coco_results:
+                result["category_id"] = reverse_id_mapping[result["category_id"]]
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(coco_results))
+                f.flush()
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info("Evaluating predictions ...")
+
+        assert self._tasks is None or set(self._tasks) == {
+            "bbox"
+        }, "[RotatedCOCOEvaluator] Only bbox evaluation is supported"
+        coco_eval = (
+            self._evaluate_predictions_on_coco(self._coco_api, coco_results)
+            if len(coco_results) > 0
+            else None  # cocoapi does not handle empty results very well
+        )
+
+        task = "bbox"
+        res = self._derive_coco_results(
+            coco_eval, task, class_names=self._metadata.get("thing_classes")
+        )
+        self._results[task] = res
+
+    def _evaluate_predictions_on_coco(self, coco_gt, coco_results):
+        """
+        Evaluate the coco results using COCOEval API.
+        """
+        assert len(coco_results) > 0
+
+        coco_dt = coco_gt.loadRes(coco_results)
+
+        # Only bbox is supported for now
+        coco_eval = RotatedCOCOeval(coco_gt, coco_dt, iouType="bbox")
+
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+
+        return coco_eval
diff --git a/src/sts/detectron2/evaluation/rrc_evaluation_funcs.py b/src/sts/detectron2/evaluation/rrc_evaluation_funcs.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ed93d79af857c11a92efbb7d3cd058d519ac124
--- /dev/null
+++ b/src/sts/detectron2/evaluation/rrc_evaluation_funcs.py
@@ -0,0 +1,482 @@
+#!/usr/bin/env python2
+#encoding: UTF-8
+import json
+import sys;sys.path.append('./')
+import zipfile
+import re
+import sys
+import os
+import codecs
+import importlib
+from io import StringIO
+
+from shapely.geometry import *
+
+def print_help():
+    sys.stdout.write('Usage: python %s.py -g=<gtFile> -s=<submFile> [-o=<outputFolder> -p=<jsonParams>]' %sys.argv[0])
+    sys.exit(2)
+    
+
+def load_zip_file_keys(file,fileNameRegExp=''):
+    """
+    Returns an array with the entries of the ZIP file that match with the regular expression.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive.')
+
+    pairs = []
+    
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+                    
+        if addFile:
+            pairs.append( keyName )
+                
+    return pairs
+    
+
+def load_zip_file(file,fileNameRegExp='',allEntries=False):
+    """
+    Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive')    
+
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        
+        if addFile:
+            pairs.append( [ keyName , archive.read(name)] )
+        else:
+            if allEntries:
+                raise Exception('ZIP entry not valid: %s' %name)             
+
+    return dict(pairs)
+	
+def decode_utf8(raw):
+    """
+    Returns a Unicode object on success, or None on failure
+    """
+    try:
+        raw = codecs.decode(raw,'utf-8', 'replace')
+        #extracts BOM if exists
+        raw = raw.encode('utf8')
+        if raw.startswith(codecs.BOM_UTF8):
+            raw = raw.replace(codecs.BOM_UTF8, '', 1)
+        return raw.decode('utf-8')
+    except:
+       return None
+
+def validate_lines_in_file_gt(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+
+def validate_lines_in_file(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+    
+def validate_tl_line_gt(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    """
+    get_tl_line_values_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)   
+   
+def validate_tl_line(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    """
+    get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+    
+def get_tl_line_values_gt(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    
+    if LTRB:
+        # do not use
+        raise Exception('Not implemented.')
+
+    else:
+        # if withTranscription and withConfidence:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 -2 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:-2]]
+        #     except Exception as e:
+        #         raise(e)
+        # elif withConfidence:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 -1 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:-1]]
+        #     except Exception as e:
+        #         raise(e)
+        # elif withTranscription:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 -1 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:-1]]
+        #     except Exception as e:
+        #         raise(e)
+        # else:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:]]
+        #     except Exception as e:
+        #         raise(e)
+        
+        if withTranscription and withConfidence:
+            raise('not implemented')
+        elif withConfidence:
+            raise('not implemented')
+        elif withTranscription:
+            ptr = line.strip().split(',####')
+            cors = ptr[0].split(',')
+            recs = ptr[1].strip()
+            assert(len(cors)%2 == 0), 'num cors should be even.'
+            try:
+                points = [ float(ic) for ic in cors[:]]
+            except Exception as e:
+                raise(e)
+        else:
+            raise('not implemented')
+
+        validate_clockwise_points(points)
+        
+        if (imWidth>0 and imHeight>0):
+            for ip in range(0, len(points), 2):
+                validate_point_inside_bounds(points[ip],points[ip+1],imWidth,imHeight);
+            
+    
+    if withConfidence:
+        try:
+            confidence = 1.0
+        except ValueError:
+            raise Exception("Confidence value must be a float")       
+            
+    if withTranscription:
+        # posTranscription = numPoints + (2 if withConfidence else 1)
+        # transcription = cors[-1].strip()
+        transcription = recs
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    
+    return points,confidence,transcription
+
+def get_tl_line_values(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    
+    if LTRB:
+        # do not use
+        raise Exception('Not implemented.')
+
+    else:
+        if withTranscription and withConfidence:
+            raise('not implemented')
+        elif withConfidence:
+            raise('not implemented')
+        elif withTranscription:
+            ptr = line.strip().split(',####')
+            cors = ptr[0].split(',')
+            recs = ptr[1].strip()
+            assert(len(cors)%2 == 0), 'num cors should be even.'
+            try:
+                points = [ float(ic) for ic in cors[:]]
+            except Exception as e:
+                raise(e)
+        else:
+            raise('not implemented')
+        
+        # print('det clock wise')
+        validate_clockwise_points(points)
+        
+        if (imWidth>0 and imHeight>0):
+            for ip in range(0, len(points), 2):
+                validate_point_inside_bounds(points[ip],points[ip+1],imWidth,imHeight);
+            
+    
+    if withConfidence:
+        try:
+            confidence = 1.0
+        except ValueError:
+            raise Exception("Confidence value must be a float")       
+            
+    if withTranscription:
+        # posTranscription = numPoints + (2 if withConfidence else 1)
+        transcription = recs
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    
+    return points,confidence,transcription
+    
+            
+def validate_point_inside_bounds(x,y,imWidth,imHeight):
+    if(x<0 or x>imWidth):
+            raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %(xmin,imWidth,imHeight))
+    if(y<0 or y>imHeight):
+            raise Exception("Y value (%s)  not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" %(ymin,imWidth,imHeight))
+
+def validate_clockwise_points(points):
+    """
+    Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
+    """
+    
+    # if len(points) != 8:
+    #     raise Exception("Points list not valid." + str(len(points)))
+    
+    # point = [
+    #             [int(points[0]) , int(points[1])],
+    #             [int(points[2]) , int(points[3])],
+    #             [int(points[4]) , int(points[5])],
+    #             [int(points[6]) , int(points[7])]
+    #         ]
+    # edge = [
+    #             ( point[1][0] - point[0][0])*( point[1][1] + point[0][1]),
+    #             ( point[2][0] - point[1][0])*( point[2][1] + point[1][1]),
+    #             ( point[3][0] - point[2][0])*( point[3][1] + point[2][1]),
+    #             ( point[0][0] - point[3][0])*( point[0][1] + point[3][1])
+    # ]
+    
+    # summatory = edge[0] + edge[1] + edge[2] + edge[3];
+    # if summatory>0:
+    #     raise Exception("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+    pts = [(points[j], points[j+1]) for j in range(0,len(points),2)]
+    try:
+        pdet = Polygon(pts)
+    except:
+        assert(0), ('not a valid polygon', pts)
+    # The polygon should be valid.
+    if not pdet.is_valid: 
+        assert(0), ('polygon has intersection sides', pts)
+    pRing = LinearRing(pts)
+    if pRing.is_ccw:
+        assert(0),  ("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+        
+def get_tl_line_values_from_file_contents(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]        
+        
+    return pointsList,confidencesList,transcriptionsList
+
+def get_tl_line_values_from_file_contents_det(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]        
+        
+    return pointsList,confidencesList,transcriptionsList
+
+def main_evaluation(p,det_file, gt_file, default_evaluation_params_fn,validate_data_fn,evaluate_method_fn,show_result=True,per_sample=True):
+    """
+    This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
+    Params:
+    p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
+    """
+    
+    # if (p == None):
+    #     p = dict([s[1:].split('=') for s in sys.argv[1:]])
+    #     if(len(sys.argv)<3):
+    #         print_help()
+    p = {}
+    p['g'] =gt_file  #'tttgt.zip'
+    p['s'] =det_file #'det.zip'
+
+    evalParams = default_evaluation_params_fn()
+    if 'p' in p.keys():
+        evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+
+    resDict={'calculated':True,'Message':'','method':'{}','per_sample':'{}'}    
+    # try:
+    validate_data_fn(p['g'], p['s'], evalParams)  
+    evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
+    resDict.update(evalData)
+        
+    # except Exception as e:
+        # resDict['Message']= str(e)
+        # resDict['calculated']=False
+
+    if 'o' in p:
+        if not os.path.exists(p['o']):
+            os.makedirs(p['o'])
+
+        resultsOutputname = p['o'] + '/results.zip'
+        outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
+
+        del resDict['per_sample']
+        if 'output_items' in resDict.keys():
+            del resDict['output_items']
+
+        outZip.writestr('method.json',json.dumps(resDict))
+        
+    if not resDict['calculated']:
+        if show_result:
+            sys.stderr.write('Error!\n'+ resDict['Message']+'\n\n')
+        if 'o' in p:
+            outZip.close()
+        return resDict
+    
+    if 'o' in p:
+        if per_sample == True:
+            for k,v in evalData['per_sample'].items():
+                outZip.writestr( k + '.json',json.dumps(v)) 
+
+            if 'output_items' in evalData.keys():
+                for k, v in evalData['output_items'].items():
+                    outZip.writestr( k,v) 
+
+        outZip.close()
+
+    if show_result:
+        sys.stdout.write("Calculated!")
+        sys.stdout.write('\n')
+        sys.stdout.write(json.dumps(resDict['e2e_method']))
+        sys.stdout.write('\n')
+        sys.stdout.write(json.dumps(resDict['det_only_method']))
+        sys.stdout.write('\n')
+    
+    return resDict
+
+
+def main_validation(default_evaluation_params_fn,validate_data_fn):
+    """
+    This process validates a method
+    Params:
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    """    
+    try:
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        evalParams = default_evaluation_params_fn()
+        if 'p' in p.keys():
+            evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+
+        validate_data_fn(p['g'], p['s'], evalParams)              
+        print('SUCCESS')
+        sys.exit(0)
+    except Exception as e:
+        print(str(e))
+        sys.exit(101)
diff --git a/src/sts/detectron2/evaluation/rrc_evaluation_funcs_ic15.py b/src/sts/detectron2/evaluation/rrc_evaluation_funcs_ic15.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba6c70f3605eaaaffad624db0e1099fcfada8179
--- /dev/null
+++ b/src/sts/detectron2/evaluation/rrc_evaluation_funcs_ic15.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python2
+#encoding: UTF-8
+import json
+import sys;sys.path.append('./')
+import zipfile
+import re
+import sys
+import os
+import codecs
+import importlib
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+def print_help():
+    sys.stdout.write('Usage: python %s.py -g=<gtFile> -s=<submFile> [-o=<outputFolder> -p=<jsonParams>]' %sys.argv[0])
+    sys.exit(2)
+    
+
+def load_zip_file_keys(file,fileNameRegExp=''):
+    """
+    Returns an array with the entries of the ZIP file that match with the regular expression.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive.')
+
+    pairs = []
+    
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+                    
+        if addFile:
+            pairs.append( keyName )
+                
+    return pairs
+    
+
+def load_zip_file(file,fileNameRegExp='',allEntries=False):
+    """
+    Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive')    
+
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        
+        if addFile:
+            pairs.append( [ keyName , archive.read(name)] )
+        else:
+            if allEntries:
+                raise Exception('ZIP entry not valid: %s' %name)             
+
+    return dict(pairs)
+	
+def decode_utf8(raw):
+    """
+    Returns a Unicode object on success, or None on failure
+    """
+    try:
+        raw = codecs.decode(raw,'utf-8', 'replace')
+        #extracts BOM if exists
+        raw = raw.encode('utf8')
+        if raw.startswith(codecs.BOM_UTF8):
+            raw = raw.replace(codecs.BOM_UTF8, '', 1)
+        return raw.decode('utf-8')
+    except:
+       return None
+   
+def validate_lines_in_file(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+    
+   
+   
+def validate_tl_line(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    """
+    get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+    
+   
+def get_tl_line_values(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    
+    numPoints = 4;
+    
+    if LTRB:
+    
+        numPoints = 4;
+        
+        if withTranscription and withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+            if m == None :
+                m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence,transcription")
+        elif withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence")
+        elif withTranscription:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,transcription")
+        else:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,?\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax")
+            
+        xmin = int(m.group(1))
+        ymin = int(m.group(2))
+        xmax = int(m.group(3))
+        ymax = int(m.group(4))
+        if(xmax<xmin):
+                raise Exception("Xmax value (%s) not valid (Xmax < Xmin)." %(xmax))
+        if(ymax<ymin):
+                raise Exception("Ymax value (%s)  not valid (Ymax < Ymin)." %(ymax))  
+
+        points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
+        
+        if (imWidth>0 and imHeight>0):
+            validate_point_inside_bounds(xmin,ymin,imWidth,imHeight);
+            validate_point_inside_bounds(xmax,ymax,imWidth,imHeight);
+
+    else:
+        numPoints = 8;
+        
+        if withTranscription and withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence,transcription")
+        elif withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence")
+        elif withTranscription:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,transcription")
+        else:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4")
+            
+        points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
+        validate_clockwise_points(points)
+        
+        if (imWidth>0 and imHeight>0):
+            validate_point_inside_bounds(points[0],points[1],imWidth,imHeight);
+            validate_point_inside_bounds(points[2],points[3],imWidth,imHeight);
+            validate_point_inside_bounds(points[4],points[5],imWidth,imHeight);
+            validate_point_inside_bounds(points[6],points[7],imWidth,imHeight);
+            
+    
+    if withConfidence:
+        try:
+            confidence = float(m.group(numPoints+1))
+        except ValueError:
+            raise Exception("Confidence value must be a float")       
+            
+    if withTranscription:
+        posTranscription = numPoints + (2 if withConfidence else 1)
+        transcription = m.group(posTranscription)
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    
+    return points,confidence,transcription
+    
+            
+def validate_point_inside_bounds(x,y,imWidth,imHeight):
+    if(x<0 or x>imWidth):
+            raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %(xmin,imWidth,imHeight))
+    if(y<0 or y>imHeight):
+            raise Exception("Y value (%s)  not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" %(ymin,imWidth,imHeight))
+
+def validate_clockwise_points(points):
+    """
+    Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
+    """
+    
+    if len(points) != 8:
+        raise Exception("Points list not valid." + str(len(points)))
+    
+    point = [
+                [int(points[0]) , int(points[1])],
+                [int(points[2]) , int(points[3])],
+                [int(points[4]) , int(points[5])],
+                [int(points[6]) , int(points[7])]
+            ]
+    edge = [
+                ( point[1][0] - point[0][0])*( point[1][1] + point[0][1]),
+                ( point[2][0] - point[1][0])*( point[2][1] + point[1][1]),
+                ( point[3][0] - point[2][0])*( point[3][1] + point[2][1]),
+                ( point[0][0] - point[3][0])*( point[0][1] + point[3][1])
+    ]
+    
+    summatory = edge[0] + edge[1] + edge[2] + edge[3];
+    if summatory>0:
+        raise Exception("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+
+def get_tl_line_values_from_file_contents(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]        
+        
+    return pointsList,confidencesList,transcriptionsList
+
+def main_evaluation(p,default_evaluation_params_fn,validate_data_fn,evaluate_method_fn,show_result=True,per_sample=True):
+    """
+    This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
+    Params:
+    p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
+    """
+    
+    if (p == None):
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        if(len(sys.argv)<3):
+            print_help()
+
+    evalParams = default_evaluation_params_fn()
+    if 'p' in p.keys():
+        evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+
+    resDict={'calculated':True,'Message':'','method':'{}','per_sample':'{}'}    
+    try:
+        validate_data_fn(p['g'], p['s'], evalParams)  
+        evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
+        resDict.update(evalData)
+        
+    except Exception as e:
+        resDict['Message']= str(e)
+        resDict['calculated']=False
+
+    if 'o' in p:
+        if not os.path.exists(p['o']):
+            os.makedirs(p['o'])
+
+        resultsOutputname = p['o'] + '/results.zip'
+        outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
+
+        del resDict['per_sample']
+        if 'output_items' in resDict.keys():
+            del resDict['output_items']
+
+        outZip.writestr('method.json',json.dumps(resDict))
+        
+    if not resDict['calculated']:
+        if show_result:
+            sys.stderr.write('Error!\n'+ resDict['Message']+'\n\n')
+        if 'o' in p:
+            outZip.close()
+        return resDict
+    
+    if 'o' in p:
+        if per_sample == True:
+            for k,v in evalData['per_sample'].items():
+                outZip.writestr( k + '.json',json.dumps(v)) 
+
+            if 'output_items' in evalData.keys():
+                for k, v in evalData['output_items'].items():
+                    outZip.writestr( k,v) 
+
+        outZip.close()
+
+    if show_result:
+        sys.stdout.write("Calculated!")
+        sys.stdout.write(json.dumps(resDict['method']))
+    
+    return resDict
+
+
+def main_validation(default_evaluation_params_fn,validate_data_fn):
+    """
+    This process validates a method
+    Params:
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    """    
+    try:
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        evalParams = default_evaluation_params_fn()
+        if 'p' in p.keys():
+            evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+
+        validate_data_fn(p['g'], p['s'], evalParams)              
+        print('SUCCESS')
+        sys.exit(0)
+    except Exception as e:
+        print(str(e))
+        sys.exit(101)
diff --git a/src/sts/detectron2/evaluation/sem_seg_evaluation.py b/src/sts/detectron2/evaluation/sem_seg_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a19db71562ef47569dc7f77ec616af85447f0ec
--- /dev/null
+++ b/src/sts/detectron2/evaluation/sem_seg_evaluation.py
@@ -0,0 +1,184 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import json
+import logging
+import numpy as np
+import os
+from collections import OrderedDict
+import PIL.Image as Image
+import pycocotools.mask as mask_util
+import torch
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.comm import all_gather, is_main_process, synchronize
+from detectron2.utils.file_io import PathManager
+
+from .evaluator import DatasetEvaluator
+
+
+class SemSegEvaluator(DatasetEvaluator):
+    """
+    Evaluate semantic segmentation metrics.
+    """
+
+    def __init__(
+        self,
+        dataset_name,
+        distributed=True,
+        output_dir=None,
+        *,
+        num_classes=None,
+        ignore_label=None,
+    ):
+        """
+        Args:
+            dataset_name (str): name of the dataset to be evaluated.
+            distributed (bool): if True, will collect results from all ranks for evaluation.
+                Otherwise, will evaluate the results in the current process.
+            output_dir (str): an output directory to dump results.
+            num_classes, ignore_label: deprecated argument
+        """
+        self._logger = logging.getLogger(__name__)
+        if num_classes is not None:
+            self._logger.warn(
+                "SemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata."
+            )
+        if ignore_label is not None:
+            self._logger.warn(
+                "SemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata."
+            )
+        self._dataset_name = dataset_name
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+
+        self.input_file_to_gt_file = {
+            dataset_record["file_name"]: dataset_record["sem_seg_file_name"]
+            for dataset_record in DatasetCatalog.get(dataset_name)
+        }
+
+        meta = MetadataCatalog.get(dataset_name)
+        # Dict that maps contiguous training ids to COCO category ids
+        try:
+            c2d = meta.stuff_dataset_id_to_contiguous_id
+            self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()}
+        except AttributeError:
+            self._contiguous_id_to_dataset_id = None
+        self._class_names = meta.stuff_classes
+        self._num_classes = len(meta.stuff_classes)
+        if num_classes is not None:
+            assert self._num_classes == num_classes, f"{self._num_classes} != {num_classes}"
+        self._ignore_label = ignore_label if ignore_label is not None else meta.ignore_label
+
+    def reset(self):
+        self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64)
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a model.
+                It is a list of dicts. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name".
+            outputs: the outputs of a model. It is either list of semantic segmentation predictions
+                (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic
+                segmentation prediction in the same format.
+        """
+        for input, output in zip(inputs, outputs):
+            output = output["sem_seg"].argmax(dim=0).to(self._cpu_device)
+            pred = np.array(output, dtype=np.int)
+            with PathManager.open(self.input_file_to_gt_file[input["file_name"]], "rb") as f:
+                gt = np.array(Image.open(f), dtype=np.int)
+
+            gt[gt == self._ignore_label] = self._num_classes
+
+            self._conf_matrix += np.bincount(
+                (self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1),
+                minlength=self._conf_matrix.size,
+            ).reshape(self._conf_matrix.shape)
+
+            self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"]))
+
+    def evaluate(self):
+        """
+        Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval):
+
+        * Mean intersection-over-union averaged across classes (mIoU)
+        * Frequency Weighted IoU (fwIoU)
+        * Mean pixel accuracy averaged across classes (mACC)
+        * Pixel Accuracy (pACC)
+        """
+        if self._distributed:
+            synchronize()
+            conf_matrix_list = all_gather(self._conf_matrix)
+            self._predictions = all_gather(self._predictions)
+            self._predictions = list(itertools.chain(*self._predictions))
+            if not is_main_process():
+                return
+
+            self._conf_matrix = np.zeros_like(self._conf_matrix)
+            for conf_matrix in conf_matrix_list:
+                self._conf_matrix += conf_matrix
+
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "sem_seg_predictions.json")
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(self._predictions))
+
+        acc = np.full(self._num_classes, np.nan, dtype=np.float)
+        iou = np.full(self._num_classes, np.nan, dtype=np.float)
+        tp = self._conf_matrix.diagonal()[:-1].astype(np.float)
+        pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float)
+        class_weights = pos_gt / np.sum(pos_gt)
+        pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float)
+        acc_valid = pos_gt > 0
+        acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid]
+        iou_valid = (pos_gt + pos_pred) > 0
+        union = pos_gt + pos_pred - tp
+        iou[acc_valid] = tp[acc_valid] / union[acc_valid]
+        macc = np.sum(acc[acc_valid]) / np.sum(acc_valid)
+        miou = np.sum(iou[acc_valid]) / np.sum(iou_valid)
+        fiou = np.sum(iou[acc_valid] * class_weights[acc_valid])
+        pacc = np.sum(tp) / np.sum(pos_gt)
+
+        res = {}
+        res["mIoU"] = 100 * miou
+        res["fwIoU"] = 100 * fiou
+        for i, name in enumerate(self._class_names):
+            res["IoU-{}".format(name)] = 100 * iou[i]
+        res["mACC"] = 100 * macc
+        res["pACC"] = 100 * pacc
+        for i, name in enumerate(self._class_names):
+            res["ACC-{}".format(name)] = 100 * acc[i]
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(res, f)
+        results = OrderedDict({"sem_seg": res})
+        self._logger.info(results)
+        return results
+
+    def encode_json_sem_seg(self, sem_seg, input_file_name):
+        """
+        Convert semantic segmentation to COCO stuff format with segments encoded as RLEs.
+        See http://cocodataset.org/#format-results
+        """
+        json_list = []
+        for label in np.unique(sem_seg):
+            if self._contiguous_id_to_dataset_id is not None:
+                assert (
+                    label in self._contiguous_id_to_dataset_id
+                ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name)
+                dataset_id = self._contiguous_id_to_dataset_id[label]
+            else:
+                dataset_id = int(label)
+            mask = (sem_seg == label).astype(np.uint8)
+            mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0]
+            mask_rle["counts"] = mask_rle["counts"].decode("utf-8")
+            json_list.append(
+                {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle}
+            )
+        return json_list
diff --git a/src/sts/detectron2/evaluation/testing.py b/src/sts/detectron2/evaluation/testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..04517f51548c085e6a9f56c943a17421ef07a388
--- /dev/null
+++ b/src/sts/detectron2/evaluation/testing.py
@@ -0,0 +1,83 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+import pprint
+import sys
+from collections import OrderedDict
+from collections.abc import Mapping
+
+
+def print_csv_format(results):
+    """
+    Print main metrics in a format similar to Detectron,
+    so that they are easy to copypaste into a spreadsheet.
+
+    Args:
+        results (OrderedDict[dict]): task_name -> {metric -> score}
+    """
+    # unordered results cannot be properly printed
+    assert isinstance(results, OrderedDict) or not len(results), results
+    logger = logging.getLogger(__name__)
+    for task, res in results.items():
+        # Don't print "AP-category" metrics since they are usually not tracked.
+        important_res = [(k, v) for k, v in res.items() if "-" not in k]
+        logger.info("copypaste: Task: {}".format(task))
+        logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
+        logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
+
+
+def verify_results(cfg, results):
+    """
+    Args:
+        results (OrderedDict[dict]): task_name -> {metric -> score}
+
+    Returns:
+        bool: whether the verification succeeds or not
+    """
+    expected_results = cfg.TEST.EXPECTED_RESULTS
+    if not len(expected_results):
+        return True
+
+    ok = True
+    for task, metric, expected, tolerance in expected_results:
+        actual = results[task].get(metric, None)
+        if actual is None:
+            ok = False
+            continue
+        if not np.isfinite(actual):
+            ok = False
+            continue
+        diff = abs(actual - expected)
+        if diff > tolerance:
+            ok = False
+
+    logger = logging.getLogger(__name__)
+    if not ok:
+        logger.error("Result verification failed!")
+        logger.error("Expected Results: " + str(expected_results))
+        logger.error("Actual Results: " + pprint.pformat(results))
+
+        sys.exit(1)
+    else:
+        logger.info("Results verification passed.")
+    return ok
+
+
+def flatten_results_dict(results):
+    """
+    Expand a hierarchical dict of scalars into a flat dict of scalars.
+    If results[k1][k2][k3] = v, the returned dict will have the entry
+    {"k1/k2/k3": v}.
+
+    Args:
+        results (dict):
+    """
+    r = {}
+    for k, v in results.items():
+        if isinstance(v, Mapping):
+            v = flatten_results_dict(v)
+            for kk, vv in v.items():
+                r[k + "/" + kk] = vv
+        else:
+            r[k] = v
+    return r
diff --git a/src/sts/detectron2/evaluation/text_eval_script.py b/src/sts/detectron2/evaluation/text_eval_script.py
new file mode 100644
index 0000000000000000000000000000000000000000..37758770740bb3ddcad6520f6d5887669f9d8183
--- /dev/null
+++ b/src/sts/detectron2/evaluation/text_eval_script.py
@@ -0,0 +1,472 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# encoding=utf8
+from collections import namedtuple
+from detectron2.evaluation import rrc_evaluation_funcs
+import importlib
+import sys
+
+import math 
+
+from rapidfuzz import string_metric
+
+WORD_SPOTTING =True
+def evaluation_imports():
+    """
+    evaluation_imports: Dictionary ( key = module name , value = alias  )  with python modules used in the evaluation. 
+    """      
+    return {
+            'Polygon':'plg',
+            'numpy':'np'
+            }
+
+def default_evaluation_params():
+    """
+    default_evaluation_params: Default parameters to use for the validation and evaluation.
+    """
+    global WORD_SPOTTING          
+    return {
+            'IOU_CONSTRAINT' :0.5,
+            'AREA_PRECISION_CONSTRAINT' :0.5,
+            'WORD_SPOTTING' :WORD_SPOTTING,
+            'MIN_LENGTH_CARE_WORD' :3,
+            'GT_SAMPLE_NAME_2_ID':'([0-9]+).txt',
+            'DET_SAMPLE_NAME_2_ID':'([0-9]+).txt',            
+            'LTRB':False, #LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
+            'CRLF':False, # Lines are delimited by Windows CRLF format
+            'CONFIDENCES':False, #Detections must include confidence value. MAP and MAR will be calculated,
+            'SPECIAL_CHARACTERS':str('!?.:,*"()·[]/\''),
+            'ONLY_REMOVE_FIRST_LAST_CHARACTER' : True
+        }
+
+def validate_data(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method validate_data: validates that all files in the results folder are correct (have the correct name contents).
+                            Validates also that there are no missing files in the folder.
+                            If some error detected, the method raises the error
+    """
+    gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    
+    subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
+
+    #Validate format of GroundTruth
+    for k in gt:
+        rrc_evaluation_funcs.validate_lines_in_file_gt(k,gt[k],evaluationParams['CRLF'],evaluationParams['LTRB'],True)
+
+    #Validate format of results
+    for k in subm:
+        if (k in gt) == False :
+            raise Exception("The sample %s not present in GT" %k)
+        
+        rrc_evaluation_funcs.validate_lines_in_file(k,subm[k],evaluationParams['CRLF'],evaluationParams['LTRB'],True,evaluationParams['CONFIDENCES'])
+
+    
+def evaluate_method(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method evaluate_method: evaluate method and returns the results
+        Results. Dictionary with the following values:
+        - method (required)  Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 }
+        - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 }
+    """  
+    for module,alias in evaluation_imports().items():
+        globals()[alias] = importlib.import_module(module)
+
+    def polygon_from_points(points):
+        """
+        Returns a Polygon object to use with the Polygon2 class from a list of 8 points: x1,y1,x2,y2,x3,y3,x4,y4
+        """        
+        num_points = len(points)
+        # resBoxes=np.empty([1,num_points],dtype='int32')
+        resBoxes=np.empty([1,num_points],dtype='float32')
+        for inp in range(0, num_points, 2):
+            resBoxes[0, int(inp/2)] = float(points[int(inp)])
+            resBoxes[0, int(inp/2+num_points/2)] = float(points[int(inp+1)])
+        pointMat = resBoxes[0].reshape([2,int(num_points/2)]).T
+        return plg.Polygon(pointMat)    
+
+    def rectangle_to_polygon(rect):
+        resBoxes=np.empty([1,8],dtype='int32')
+        resBoxes[0,0]=int(rect.xmin)
+        resBoxes[0,4]=int(rect.ymax)
+        resBoxes[0,1]=int(rect.xmin)
+        resBoxes[0,5]=int(rect.ymin)
+        resBoxes[0,2]=int(rect.xmax)
+        resBoxes[0,6]=int(rect.ymin)
+        resBoxes[0,3]=int(rect.xmax)
+        resBoxes[0,7]=int(rect.ymax)
+
+        pointMat = resBoxes[0].reshape([2,4]).T
+        
+        return plg.Polygon( pointMat)
+    
+    def rectangle_to_points(rect):
+        points = [int(rect.xmin), int(rect.ymax), int(rect.xmax), int(rect.ymax), int(rect.xmax), int(rect.ymin), int(rect.xmin), int(rect.ymin)]
+        return points
+        
+    def get_union(pD,pG):
+        areaA = pD.area();
+        areaB = pG.area();
+        return areaA + areaB - get_intersection(pD, pG);
+        
+    def get_intersection_over_union(pD,pG):
+        try:
+            return get_intersection(pD, pG) / get_union(pD, pG);
+        except:
+            return 0
+        
+    def get_intersection(pD,pG):
+        pInt = pD & pG
+        if len(pInt) == 0:
+            return 0
+        return pInt.area()
+    
+    def compute_ap(confList, matchList,numGtCare):
+        correct = 0
+        AP = 0
+        if len(confList)>0:
+            confList = np.array(confList)
+            matchList = np.array(matchList)
+            sorted_ind = np.argsort(-confList)
+            confList = confList[sorted_ind]
+            matchList = matchList[sorted_ind]
+            for n in range(len(confList)):
+                match = matchList[n]
+                if match:
+                    correct += 1
+                    AP += float(correct)/(n + 1)
+
+            if numGtCare>0:
+                AP /= numGtCare
+            
+        return AP  
+    
+    def transcription_match(transGt,transDet,specialCharacters=str(r'!?.:,*"()·[]/\''),onlyRemoveFirstLastCharacterGT=True):
+        
+        if onlyRemoveFirstLastCharacterGT:
+            #special characters in GT are allowed only at initial or final position
+            if (transGt==transDet):
+                return True        
+
+            if specialCharacters.find(transGt[0])>-1:
+                if transGt[1:]==transDet:
+                    return True
+
+            if specialCharacters.find(transGt[-1])>-1:
+                if transGt[0:len(transGt)-1]==transDet:
+                    return True
+
+            if specialCharacters.find(transGt[0])>-1 and specialCharacters.find(transGt[-1])>-1:
+                if transGt[1:len(transGt)-1]==transDet:
+                    return True
+            return False
+        else:
+            #Special characters are removed from the begining and the end of both Detection and GroundTruth
+            while len(transGt)>0 and specialCharacters.find(transGt[0])>-1:
+                transGt = transGt[1:]
+				
+            while len(transDet)>0 and specialCharacters.find(transDet[0])>-1:
+                transDet = transDet[1:]
+                
+            while len(transGt)>0 and specialCharacters.find(transGt[-1])>-1 :
+                transGt = transGt[0:len(transGt)-1]
+                
+            while len(transDet)>0 and specialCharacters.find(transDet[-1])>-1:
+                transDet = transDet[0:len(transDet)-1]
+                
+            return transGt == transDet
+                    
+    
+    def include_in_dictionary(transcription):
+        """
+        Function used in Word Spotting that finds if the Ground Truth transcription meets the rules to enter into the dictionary. If not, the transcription will be cared as don't care
+        """        
+        #special case 's at final
+        if transcription[len(transcription)-2:]=="'s" or transcription[len(transcription)-2:]=="'S":
+            transcription = transcription[0:len(transcription)-2]
+        
+        #hypens at init or final of the word
+        transcription = transcription.strip('-');
+        
+        specialCharacters = str("'!?.:,*\"()·[]/");
+        for character in specialCharacters:
+            transcription = transcription.replace(character,' ')
+        
+        transcription = transcription.strip()
+        
+        if len(transcription) != len(transcription.replace(" ","")) :
+            return False;
+        
+        if len(transcription) < evaluationParams['MIN_LENGTH_CARE_WORD']:
+            return False;
+        
+        notAllowed = str("×÷·");
+        
+        range1 = [ ord(u'a'), ord(u'z') ]
+        range2 = [ ord(u'A'), ord(u'Z') ]
+        range3 = [ ord(u'À'), ord(u'ƿ') ]
+        range4 = [ ord(u'Ǆ'), ord(u'ɿ') ]
+        range5 = [ ord(u'Ά'), ord(u'Ͽ') ]
+        range6 = [ ord(u'-'), ord(u'-') ]
+        
+        for char in transcription :
+            charCode = ord(char)
+            if(notAllowed.find(char) != -1):
+                return False
+            
+            valid = ( charCode>=range1[0] and charCode<=range1[1] ) or ( charCode>=range2[0] and charCode<=range2[1] ) or ( charCode>=range3[0] and charCode<=range3[1] ) or ( charCode>=range4[0] and charCode<=range4[1] ) or ( charCode>=range5[0] and charCode<=range5[1] ) or ( charCode>=range6[0] and charCode<=range6[1] )
+            if valid == False:
+                return False
+        
+        return True
+    
+    def include_in_dictionary_transcription(transcription):
+        """
+        Function applied to the Ground Truth transcriptions used in Word Spotting. It removes special characters or terminations
+        """
+        #special case 's at final
+        if transcription[len(transcription)-2:]=="'s" or transcription[len(transcription)-2:]=="'S":
+            transcription = transcription[0:len(transcription)-2]
+        
+        #hypens at init or final of the word
+        transcription = transcription.strip('-');            
+        
+        specialCharacters = str("'!?.:,*\"()·[]/");
+        for character in specialCharacters:
+            transcription = transcription.replace(character,' ')
+        
+        transcription = transcription.strip()
+        
+        return transcription
+    
+    perSampleMetrics = {}
+    
+    matchedSum = 0
+    det_only_matchedSum = 0
+
+    Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
+    
+    gt = rrc_evaluation_funcs.load_zip_file(gtFilePath,evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    subm = rrc_evaluation_funcs.load_zip_file(submFilePath,evaluationParams['DET_SAMPLE_NAME_2_ID'],True)
+   
+    numGlobalCareGt = 0;
+    numGlobalCareDet = 0;
+    det_only_numGlobalCareGt = 0;
+    det_only_numGlobalCareDet = 0;
+   
+    arrGlobalConfidences = [];
+    arrGlobalMatches = [];
+
+    for resFile in gt:
+        # print('resgt', resFile)
+        gtFile = rrc_evaluation_funcs.decode_utf8(gt[resFile])
+        if (gtFile is None) :
+            raise Exception("The file %s is not UTF-8" %resFile)        
+
+        recall = 0
+        precision = 0
+        hmean = 0    
+        detCorrect = 0
+        detOnlyCorrect = 0
+        iouMat = np.empty([1,1])
+        gtPols = []
+        detPols = []
+        gtTrans = []
+        detTrans = []
+        gtPolPoints = []
+        detPolPoints = []  
+        gtDontCarePolsNum = [] #Array of Ground Truth Polygons' keys marked as don't Care
+        det_only_gtDontCarePolsNum = []
+        detDontCarePolsNum = [] #Array of Detected Polygons' matched with a don't Care GT
+        det_only_detDontCarePolsNum = []
+        detMatchedNums = []
+        pairs = []
+        
+        arrSampleConfidences = [];
+        arrSampleMatch = [];
+        sampleAP = 0;
+
+        pointsList,_,transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(gtFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True,False)
+
+        for n in range(len(pointsList)):
+            points = pointsList[n]
+            transcription = transcriptionsList[n]
+            det_only_dontCare = dontCare = transcription == "###" # ctw1500 and total_text gt have been modified to the same format.
+            if evaluationParams['LTRB']:
+                gtRect = Rectangle(*points)
+                gtPol = rectangle_to_polygon(gtRect)
+            else:
+                gtPol = polygon_from_points(points)
+            gtPols.append(gtPol)
+            gtPolPoints.append(points)
+
+            #On word spotting we will filter some transcriptions with special characters
+            if evaluationParams['WORD_SPOTTING'] :
+                if dontCare == False : 
+                    if include_in_dictionary(transcription) == False : 
+                        dontCare = True
+                    else:
+                        transcription = include_in_dictionary_transcription(transcription)
+
+            gtTrans.append(transcription)
+            if dontCare:
+                gtDontCarePolsNum.append( len(gtPols)-1 ) 
+            if det_only_dontCare:
+                det_only_gtDontCarePolsNum.append( len(gtPols)-1 ) 
+
+        
+        if resFile in subm:
+            
+            detFile = rrc_evaluation_funcs.decode_utf8(subm[resFile]) 
+                    
+            pointsList,confidencesList,transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents_det(detFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True,evaluationParams['CONFIDENCES'])
+            
+            for n in range(len(pointsList)):
+                points = pointsList[n]
+                transcription = transcriptionsList[n]
+                
+                if evaluationParams['LTRB']:
+                    detRect = Rectangle(*points)
+                    detPol = rectangle_to_polygon(detRect)
+                else:                    
+                    detPol = polygon_from_points(points)
+                detPols.append(detPol)
+                detPolPoints.append(points)
+                detTrans.append(transcription)
+
+                if len(gtDontCarePolsNum)>0 :
+                    for dontCarePol in gtDontCarePolsNum:
+                        dontCarePol = gtPols[dontCarePol]
+                        intersected_area = get_intersection(dontCarePol,detPol)
+                        pdDimensions = detPol.area()
+                        precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                        if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT'] ):
+                            detDontCarePolsNum.append( len(detPols)-1 )
+                            break
+
+                if len(det_only_gtDontCarePolsNum)>0 :
+                    for dontCarePol in det_only_gtDontCarePolsNum:
+                        dontCarePol = gtPols[dontCarePol]
+                        intersected_area = get_intersection(dontCarePol,detPol)
+                        pdDimensions = detPol.area()
+                        precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                        if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT'] ):
+                            det_only_detDontCarePolsNum.append( len(detPols)-1 )
+                            break
+                                 
+            
+            if len(gtPols)>0 and len(detPols)>0:
+                #Calculate IoU and precision matrixs
+                outputShape=[len(gtPols),len(detPols)]
+                iouMat = np.empty(outputShape)
+                gtRectMat = np.zeros(len(gtPols),np.int8)
+                detRectMat = np.zeros(len(detPols),np.int8)
+                det_only_gtRectMat = np.zeros(len(gtPols),np.int8)
+                det_only_detRectMat = np.zeros(len(detPols),np.int8)
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        pG = gtPols[gtNum]
+                        pD = detPols[detNum]
+                        iouMat[gtNum,detNum] = get_intersection_over_union(pD,pG)
+                
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum :
+                            if iouMat[gtNum,detNum]>evaluationParams['IOU_CONSTRAINT']:
+                                gtRectMat[gtNum] = 1
+                                detRectMat[detNum] = 1
+                                #detection matched only if transcription is equal
+                                # det_only_correct = True
+                                # detOnlyCorrect += 1
+                                if evaluationParams['WORD_SPOTTING']:
+                                    edd = string_metric.levenshtein(gtTrans[gtNum].upper(), detTrans[detNum].upper())
+                                    if edd<=0: 
+                                        correct = True
+                                    else:
+                                        correct = False
+                                    # correct = gtTrans[gtNum].upper() == detTrans[detNum].upper()
+                                else:
+                                    try:
+                                        correct = transcription_match(gtTrans[gtNum].upper(),detTrans[detNum].upper(),evaluationParams['SPECIAL_CHARACTERS'],evaluationParams['ONLY_REMOVE_FIRST_LAST_CHARACTER'])==True
+                                    except: # empty
+                                        correct = False
+                                detCorrect += (1 if correct else 0)
+                                if correct:
+                                    detMatchedNums.append(detNum)
+                
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        if det_only_gtRectMat[gtNum] == 0 and det_only_detRectMat[detNum] == 0 and gtNum not in det_only_gtDontCarePolsNum and detNum not in det_only_detDontCarePolsNum:
+                            if iouMat[gtNum,detNum]>evaluationParams['IOU_CONSTRAINT']:
+                                det_only_gtRectMat[gtNum] = 1
+                                det_only_detRectMat[detNum] = 1
+                                #detection matched only if transcription is equal
+                                det_only_correct = True
+                                detOnlyCorrect += 1
+                                                              
+                
+        numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
+        numDetCare = (len(detPols) - len(detDontCarePolsNum))
+        det_only_numGtCare = (len(gtPols) - len(det_only_gtDontCarePolsNum))
+        det_only_numDetCare = (len(detPols) - len(det_only_detDontCarePolsNum))
+        if numGtCare == 0:
+            recall = float(1)
+            precision = float(0) if numDetCare >0 else float(1)
+        else:
+            recall = float(detCorrect) / numGtCare
+            precision = 0 if numDetCare==0 else float(detCorrect) / numDetCare
+
+        if det_only_numGtCare == 0:
+            det_only_recall = float(1)
+            det_only_precision = float(0) if det_only_numDetCare >0 else float(1)
+        else:
+            det_only_recall = float(detOnlyCorrect) / det_only_numGtCare
+            det_only_precision = 0 if det_only_numDetCare==0 else float(detOnlyCorrect) / det_only_numDetCare
+
+        
+        hmean = 0 if (precision + recall)==0 else 2.0 * precision * recall / (precision + recall)
+        det_only_hmean = 0 if (det_only_precision + det_only_recall)==0 else 2.0 * det_only_precision * det_only_recall / (det_only_precision + det_only_recall)
+            
+        matchedSum += detCorrect
+        det_only_matchedSum += detOnlyCorrect
+        numGlobalCareGt += numGtCare
+        numGlobalCareDet += numDetCare
+        det_only_numGlobalCareGt += det_only_numGtCare
+        det_only_numGlobalCareDet += det_only_numDetCare
+
+        perSampleMetrics[resFile] = {
+                                        'precision':precision,
+                                        'recall':recall,
+                                        'hmean':hmean,
+                                        'iouMat':[] if len(detPols)>100 else iouMat.tolist(),
+                                        'gtPolPoints':gtPolPoints,
+                                        'detPolPoints':detPolPoints,
+                                        'gtTrans':gtTrans,
+                                        'detTrans':detTrans,
+                                        'gtDontCare':gtDontCarePolsNum,
+                                        'detDontCare':detDontCarePolsNum,
+                                        'evaluationParams': evaluationParams,
+                                    }
+        
+    
+    methodRecall = 0 if numGlobalCareGt == 0 else float(matchedSum)/numGlobalCareGt
+    methodPrecision = 0 if numGlobalCareDet == 0 else float(matchedSum)/numGlobalCareDet
+    methodHmean = 0 if methodRecall + methodPrecision==0 else 2* methodRecall * methodPrecision / (methodRecall + methodPrecision)
+
+    det_only_methodRecall = 0 if det_only_numGlobalCareGt == 0 else float(det_only_matchedSum)/det_only_numGlobalCareGt
+    det_only_methodPrecision = 0 if det_only_numGlobalCareDet == 0 else float(det_only_matchedSum)/det_only_numGlobalCareDet
+    det_only_methodHmean = 0 if det_only_methodRecall + det_only_methodPrecision==0 else 2* det_only_methodRecall * det_only_methodPrecision / (det_only_methodRecall + det_only_methodPrecision)
+
+    
+    methodMetrics = r"E2E_RESULTS: precision: {}, recall: {}, hmean: {}".format(methodPrecision, methodRecall, methodHmean)
+    det_only_methodMetrics = r"DETECTION_ONLY_RESULTS: precision: {}, recall: {}, hmean: {}".format(det_only_methodPrecision, det_only_methodRecall, det_only_methodHmean)
+    
+    
+    resDict = {'calculated':True,'Message':'','e2e_method': methodMetrics,'det_only_method': det_only_methodMetrics,'per_sample': perSampleMetrics}
+    
+    
+    return resDict;
+
+def text_eval_main(det_file, gt_file, is_word_spotting):
+    global WORD_SPOTTING
+    WORD_SPOTTING = is_word_spotting
+    return rrc_evaluation_funcs.main_evaluation(None,det_file, gt_file, default_evaluation_params,validate_data,evaluate_method)
diff --git a/src/sts/detectron2/evaluation/text_eval_script_ic15.py b/src/sts/detectron2/evaluation/text_eval_script_ic15.py
new file mode 100644
index 0000000000000000000000000000000000000000..a99bdd063a46fb58c55b6cc3022caa2b992bac42
--- /dev/null
+++ b/src/sts/detectron2/evaluation/text_eval_script_ic15.py
@@ -0,0 +1,501 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# encoding=utf8
+from collections import namedtuple
+from detectron2.evaluation import rrc_evaluation_funcs_ic15 as rrc_evaluation_funcs
+import importlib
+import sys
+
+import math 
+
+from rapidfuzz import string_metric
+
+WORD_SPOTTING =True
+def evaluation_imports():
+    """
+    evaluation_imports: Dictionary ( key = module name , value = alias  )  with python modules used in the evaluation. 
+    """      
+    return {
+            'Polygon':'plg',
+            'numpy':'np'
+            }
+
+def default_evaluation_params():
+    """
+    default_evaluation_params: Default parameters to use for the validation and evaluation.
+    """          
+    global WORD_SPOTTING
+    return {
+            'IOU_CONSTRAINT' :0.5,
+            'AREA_PRECISION_CONSTRAINT' :0.5,
+            'WORD_SPOTTING' :WORD_SPOTTING,
+            'MIN_LENGTH_CARE_WORD' :3,
+            'GT_SAMPLE_NAME_2_ID':'gt_img_([0-9]+).txt',
+            'DET_SAMPLE_NAME_2_ID':'res_img_([0-9]+).txt',            
+            'LTRB':False, #LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
+            'CRLF':False, # Lines are delimited by Windows CRLF format
+            'CONFIDENCES':False, #Detections must include confidence value. MAP and MAR will be calculated,
+            'SPECIAL_CHARACTERS':'!?.:,*"()·[]/\'',
+            'ONLY_REMOVE_FIRST_LAST_CHARACTER' : True
+        }
+
+def validate_data(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method validate_data: validates that all files in the results folder are correct (have the correct name contents).
+                            Validates also that there are no missing files in the folder.
+                            If some error detected, the method raises the error
+    """
+    gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
+    #Validate format of GroundTruth
+    for k in gt:
+        rrc_evaluation_funcs.validate_lines_in_file(k,gt[k],evaluationParams['CRLF'],evaluationParams['LTRB'],True)
+
+    #Validate format of results
+    for k in subm:
+        if (k in gt) == False :
+            raise Exception("The sample %s not present in GT" %k)
+        
+        rrc_evaluation_funcs.validate_lines_in_file(k,subm[k],evaluationParams['CRLF'],evaluationParams['LTRB'],True,evaluationParams['CONFIDENCES'])
+
+    
+def evaluate_method(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method evaluate_method: evaluate method and returns the results
+        Results. Dictionary with the following values:
+        - method (required)  Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 }
+        - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 }
+    """  
+    for module,alias in evaluation_imports().items():
+        globals()[alias] = importlib.import_module(module)
+
+    def polygon_from_points(points,correctOffset=False):
+        """
+        Returns a Polygon object to use with the Polygon2 class from a list of 8 points: x1,y1,x2,y2,x3,y3,x4,y4
+        """        
+        
+        if correctOffset: #this will substract 1 from the coordinates that correspond to the xmax and ymax
+            points[2] -= 1
+            points[4] -= 1
+            points[5] -= 1
+            points[7] -= 1
+            
+        resBoxes=np.empty([1,8],dtype='int32')
+        resBoxes[0,0]=int(points[0])
+        resBoxes[0,4]=int(points[1])
+        resBoxes[0,1]=int(points[2])
+        resBoxes[0,5]=int(points[3])
+        resBoxes[0,2]=int(points[4])
+        resBoxes[0,6]=int(points[5])
+        resBoxes[0,3]=int(points[6])
+        resBoxes[0,7]=int(points[7])
+        pointMat = resBoxes[0].reshape([2,4]).T
+        return plg.Polygon( pointMat)
+
+    def rectangle_to_polygon(rect):
+        resBoxes=np.empty([1,8],dtype='int32')
+        resBoxes[0,0]=int(rect.xmin)
+        resBoxes[0,4]=int(rect.ymax)
+        resBoxes[0,1]=int(rect.xmin)
+        resBoxes[0,5]=int(rect.ymin)
+        resBoxes[0,2]=int(rect.xmax)
+        resBoxes[0,6]=int(rect.ymin)
+        resBoxes[0,3]=int(rect.xmax)
+        resBoxes[0,7]=int(rect.ymax)
+
+        pointMat = resBoxes[0].reshape([2,4]).T
+        
+        return plg.Polygon( pointMat)
+    
+    def rectangle_to_points(rect):
+        points = [int(rect.xmin), int(rect.ymax), int(rect.xmax), int(rect.ymax), int(rect.xmax), int(rect.ymin), int(rect.xmin), int(rect.ymin)]
+        return points
+        
+    def get_union(pD,pG):
+        areaA = pD.area();
+        areaB = pG.area();
+        return areaA + areaB - get_intersection(pD, pG);
+        
+    def get_intersection_over_union(pD,pG):
+        try:
+            return get_intersection(pD, pG) / get_union(pD, pG);
+        except:
+            return 0
+        
+    def get_intersection(pD,pG):
+        pInt = pD & pG
+        if len(pInt) == 0:
+            return 0
+        return pInt.area()
+    
+    def compute_ap(confList, matchList,numGtCare):
+        correct = 0
+        AP = 0
+        if len(confList)>0:
+            confList = np.array(confList)
+            matchList = np.array(matchList)
+            sorted_ind = np.argsort(-confList)
+            confList = confList[sorted_ind]
+            matchList = matchList[sorted_ind]
+            for n in range(len(confList)):
+                match = matchList[n]
+                if match:
+                    correct += 1
+                    AP += float(correct)/(n + 1)
+
+            if numGtCare>0:
+                AP /= numGtCare
+            
+        return AP  
+    
+    def transcription_match(transGt,transDet,specialCharacters='!?.:,*"()·[]/\'',onlyRemoveFirstLastCharacterGT=True):
+        
+        if onlyRemoveFirstLastCharacterGT:
+            #special characters in GT are allowed only at initial or final position
+            if (transGt==transDet):
+                return True        
+
+            if specialCharacters.find(transGt[0])>-1:
+                if transGt[1:]==transDet:
+                    return True
+
+            if specialCharacters.find(transGt[-1])>-1:
+                if transGt[0:len(transGt)-1]==transDet:
+                    return True
+
+            if specialCharacters.find(transGt[0])>-1 and specialCharacters.find(transGt[-1])>-1:
+                if transGt[1:len(transGt)-1]==transDet:
+                    return True
+            return False
+        else:
+            #Special characters are removed from the begining and the end of both Detection and GroundTruth
+            while len(transGt)>0 and specialCharacters.find(transGt[0])>-1:
+                transGt = transGt[1:]
+				
+            while len(transDet)>0 and specialCharacters.find(transDet[0])>-1:
+                transDet = transDet[1:]
+                
+            while len(transGt)>0 and specialCharacters.find(transGt[-1])>-1 :
+                transGt = transGt[0:len(transGt)-1]
+                
+            while len(transDet)>0 and specialCharacters.find(transDet[-1])>-1:
+                transDet = transDet[0:len(transDet)-1]
+                
+            return transGt == transDet
+                    
+    
+    def include_in_dictionary(transcription):
+        """
+        Function used in Word Spotting that finds if the Ground Truth transcription meets the rules to enter into the dictionary. If not, the transcription will be cared as don't care
+        """        
+        #special case 's at final
+        if transcription[len(transcription)-2:]=="'s" or transcription[len(transcription)-2:]=="'S":
+            transcription = transcription[0:len(transcription)-2]
+        
+        #hypens at init or final of the word
+        transcription = transcription.strip('-');
+        
+        specialCharacters = "'!?.:,*\"()·[]/";
+        for character in specialCharacters:
+            transcription = transcription.replace(character,' ')
+        
+        transcription = transcription.strip()
+        
+        if len(transcription) != len(transcription.replace(" ","")) :
+            return False;
+        
+        if len(transcription) < evaluationParams['MIN_LENGTH_CARE_WORD']:
+            return False;
+        
+        notAllowed = "×÷·";
+        
+        range1 = [ ord(u'a'), ord(u'z') ]
+        range2 = [ ord(u'A'), ord(u'Z') ]
+        range3 = [ ord(u'À'), ord(u'ƿ') ]
+        range4 = [ ord(u'Ǆ'), ord(u'ɿ') ]
+        range5 = [ ord(u'Ά'), ord(u'Ͽ') ]
+        range6 = [ ord(u'-'), ord(u'-') ]
+        
+        for char in transcription :
+            charCode = ord(char)
+            if(notAllowed.find(char) != -1):
+                return False
+            
+            valid = ( charCode>=range1[0] and charCode<=range1[1] ) or ( charCode>=range2[0] and charCode<=range2[1] ) or ( charCode>=range3[0] and charCode<=range3[1] ) or ( charCode>=range4[0] and charCode<=range4[1] ) or ( charCode>=range5[0] and charCode<=range5[1] ) or ( charCode>=range6[0] and charCode<=range6[1] )
+            if valid == False:
+                return False
+        
+        return True
+    
+    def include_in_dictionary_transcription(transcription):
+        """
+        Function applied to the Ground Truth transcriptions used in Word Spotting. It removes special characters or terminations
+        """
+        #special case 's at final
+        if transcription[len(transcription)-2:]=="'s" or transcription[len(transcription)-2:]=="'S":
+            transcription = transcription[0:len(transcription)-2]
+        
+        #hypens at init or final of the word
+        transcription = transcription.strip('-');            
+        
+        specialCharacters = "'!?.:,*\"()·[]/";
+        for character in specialCharacters:
+            transcription = transcription.replace(character,' ')
+        
+        transcription = transcription.strip()
+        
+        return transcription
+    
+    perSampleMetrics = {}
+    
+    matchedSum = 0
+    det_only_matchedSum = 0
+    
+    Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
+    
+    gt = rrc_evaluation_funcs.load_zip_file(gtFilePath,evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    subm = rrc_evaluation_funcs.load_zip_file(submFilePath,evaluationParams['DET_SAMPLE_NAME_2_ID'],True)
+   
+    numGlobalCareGt = 0;
+    numGlobalCareDet = 0;
+    det_only_numGlobalCareGt = 0;
+    det_only_numGlobalCareDet = 0;
+
+    arrGlobalConfidences = [];
+    arrGlobalMatches = [];
+
+    for resFile in gt:
+        
+        gtFile = rrc_evaluation_funcs.decode_utf8(gt[resFile])
+        if (gtFile is None) :
+            raise Exception("The file %s is not UTF-8" %resFile)        
+
+        recall = 0
+        precision = 0
+        hmean = 0    
+        detCorrect = 0
+        detOnlyCorrect = 0
+        iouMat = np.empty([1,1])
+        gtPols = []
+        detPols = []
+        gtTrans = []
+        detTrans = []
+        gtPolPoints = []
+        detPolPoints = []  
+        gtDontCarePolsNum = [] #Array of Ground Truth Polygons' keys marked as don't Care
+        det_only_gtDontCarePolsNum = []
+        detDontCarePolsNum = [] #Array of Detected Polygons' matched with a don't Care GT
+        det_only_detDontCarePolsNum = []
+        detMatchedNums = []
+        pairs = []
+        
+        arrSampleConfidences = [];
+        arrSampleMatch = [];
+        sampleAP = 0;
+        
+        evaluationLog = ""
+
+        pointsList,_,transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(gtFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True,False)
+        for n in range(len(pointsList)):
+            points = pointsList[n]
+            transcription = transcriptionsList[n]
+            # dontCare = transcription == "###"
+            det_only_dontCare = dontCare = transcription == "###" # ctw1500 and total_text gt have been modified to the same format.
+            if evaluationParams['LTRB']:
+                gtRect = Rectangle(*points)
+                gtPol = rectangle_to_polygon(gtRect)
+            else:
+                gtPol = polygon_from_points(points)
+            gtPols.append(gtPol)
+            gtPolPoints.append(points)
+
+            #On word spotting we will filter some transcriptions with special characters
+            if evaluationParams['WORD_SPOTTING'] :
+                if dontCare == False : 
+                    if include_in_dictionary(transcription) == False : 
+                        dontCare = True
+                    else:
+                        transcription = include_in_dictionary_transcription(transcription)
+
+            gtTrans.append(transcription)
+            if dontCare:
+                gtDontCarePolsNum.append( len(gtPols)-1 ) 
+            if det_only_dontCare:
+                det_only_gtDontCarePolsNum.append( len(gtPols)-1 ) 
+
+        evaluationLog += "GT polygons: " + str(len(gtPols)) + (" (" + str(len(gtDontCarePolsNum)) + " don't care)\n" if len(gtDontCarePolsNum)>0 else "\n")
+        
+        if resFile in subm:
+            
+            detFile = rrc_evaluation_funcs.decode_utf8(subm[resFile]) 
+                    
+            pointsList,confidencesList,transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(detFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True,evaluationParams['CONFIDENCES'])
+            
+            for n in range(len(pointsList)):
+                points = pointsList[n]
+                transcription = transcriptionsList[n]
+                
+                if evaluationParams['LTRB']:
+                    detRect = Rectangle(*points)
+                    detPol = rectangle_to_polygon(detRect)
+                else:                    
+                    detPol = polygon_from_points(points)
+                detPols.append(detPol)
+                detPolPoints.append(points)
+                detTrans.append(transcription)
+
+                if len(gtDontCarePolsNum)>0 :
+                    for dontCarePol in gtDontCarePolsNum:
+                        dontCarePol = gtPols[dontCarePol]
+                        intersected_area = get_intersection(dontCarePol,detPol)
+                        pdDimensions = detPol.area()
+                        precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                        if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT'] ):
+                            detDontCarePolsNum.append( len(detPols)-1 )
+                            break
+                            
+                
+                if len(det_only_gtDontCarePolsNum)>0 :
+                    for dontCarePol in det_only_gtDontCarePolsNum:
+                        dontCarePol = gtPols[dontCarePol]
+                        intersected_area = get_intersection(dontCarePol,detPol)
+                        pdDimensions = detPol.area()
+                        precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                        if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT'] ):
+                            det_only_detDontCarePolsNum.append( len(detPols)-1 )
+                            break
+
+            evaluationLog += "DET polygons: " + str(len(detPols)) + (" (" + str(len(detDontCarePolsNum)) + " don't care)\n" if len(detDontCarePolsNum)>0 else "\n")
+            
+            if len(gtPols)>0 and len(detPols)>0:
+                #Calculate IoU and precision matrixs
+                outputShape=[len(gtPols),len(detPols)]
+                iouMat = np.empty(outputShape)
+                gtRectMat = np.zeros(len(gtPols),np.int8)
+                detRectMat = np.zeros(len(detPols),np.int8)
+                det_only_gtRectMat = np.zeros(len(gtPols),np.int8)
+                det_only_detRectMat = np.zeros(len(detPols),np.int8)
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        pG = gtPols[gtNum]
+                        pD = detPols[detNum]
+                        iouMat[gtNum,detNum] = get_intersection_over_union(pD,pG)
+
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum :
+                            if iouMat[gtNum,detNum]>evaluationParams['IOU_CONSTRAINT']:
+                                gtRectMat[gtNum] = 1
+                                detRectMat[detNum] = 1
+                                #detection matched only if transcription is equal
+                                if evaluationParams['WORD_SPOTTING']:
+                                    correct = gtTrans[gtNum].upper() == detTrans[detNum].upper()
+                                else:
+                                    correct = transcription_match(gtTrans[gtNum].upper(),detTrans[detNum].upper(),evaluationParams['SPECIAL_CHARACTERS'],evaluationParams['ONLY_REMOVE_FIRST_LAST_CHARACTER'])==True
+                                detCorrect += (1 if correct else 0)
+                                if correct:
+                                    detMatchedNums.append(detNum)
+                                pairs.append({'gt':gtNum,'det':detNum,'correct':correct})
+                                evaluationLog += "Match GT #" + str(gtNum) + " with Det #" + str(detNum) + " trans. correct: " + str(correct) + "\n"
+
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        if det_only_gtRectMat[gtNum] == 0 and det_only_detRectMat[detNum] == 0 and gtNum not in det_only_gtDontCarePolsNum and detNum not in det_only_detDontCarePolsNum:
+                            if iouMat[gtNum,detNum]>evaluationParams['IOU_CONSTRAINT']:
+                                det_only_gtRectMat[gtNum] = 1
+                                det_only_detRectMat[detNum] = 1
+                                #detection matched only if transcription is equal
+                                det_only_correct = True
+                                detOnlyCorrect += 1
+
+            if evaluationParams['CONFIDENCES']:
+                for detNum in range(len(detPols)):
+                    if detNum not in detDontCarePolsNum :
+                        #we exclude the don't care detections
+                        match = detNum in detMatchedNums
+
+                        arrSampleConfidences.append(confidencesList[detNum])
+                        arrSampleMatch.append(match)
+
+                        arrGlobalConfidences.append(confidencesList[detNum]);
+                        arrGlobalMatches.append(match);                                
+                
+        numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
+        numDetCare = (len(detPols) - len(detDontCarePolsNum))
+        det_only_numGtCare = (len(gtPols) - len(det_only_gtDontCarePolsNum))
+        det_only_numDetCare = (len(detPols) - len(det_only_detDontCarePolsNum))
+        if numGtCare == 0:
+            recall = float(1)
+            precision = float(0) if numDetCare >0 else float(1)
+            sampleAP = precision
+        else:
+            recall = float(detCorrect) / numGtCare
+            precision = 0 if numDetCare==0 else float(detCorrect) / numDetCare
+            if evaluationParams['CONFIDENCES']:
+                sampleAP = compute_ap(arrSampleConfidences, arrSampleMatch, numGtCare )                    
+
+        if det_only_numGtCare == 0:
+            det_only_recall = float(1)
+            det_only_precision = float(0) if det_only_numDetCare >0 else float(1)
+        else:
+            det_only_recall = float(detOnlyCorrect) / det_only_numGtCare
+            det_only_precision = 0 if det_only_numDetCare==0 else float(detOnlyCorrect) / det_only_numDetCare
+
+        hmean = 0 if (precision + recall)==0 else 2.0 * precision * recall / (precision + recall)
+        det_only_hmean = 0 if (det_only_precision + det_only_recall)==0 else 2.0 * det_only_precision * det_only_recall / (det_only_precision + det_only_recall)
+
+        matchedSum += detCorrect
+        det_only_matchedSum += detOnlyCorrect
+        numGlobalCareGt += numGtCare
+        numGlobalCareDet += numDetCare
+        det_only_numGlobalCareGt += det_only_numGtCare
+        det_only_numGlobalCareDet += det_only_numDetCare
+
+        perSampleMetrics[resFile] = {
+                                        'precision':precision,
+                                        'recall':recall,
+                                        'hmean':hmean,
+                                        'pairs':pairs,
+                                        'AP':sampleAP,
+                                        'iouMat':[] if len(detPols)>100 else iouMat.tolist(),
+                                        'gtPolPoints':gtPolPoints,
+                                        'detPolPoints':detPolPoints,
+                                        'gtTrans':gtTrans,
+                                        'detTrans':detTrans,
+                                        'gtDontCare':gtDontCarePolsNum,
+                                        'detDontCare':detDontCarePolsNum,
+                                        'evaluationParams': evaluationParams,
+                                        'evaluationLog': evaluationLog     
+                                    }
+        
+    # Compute AP
+    AP = 0
+    if evaluationParams['CONFIDENCES']:
+        AP = compute_ap(arrGlobalConfidences, arrGlobalMatches, numGlobalCareGt)
+
+    methodRecall = 0 if numGlobalCareGt == 0 else float(matchedSum)/numGlobalCareGt
+    methodPrecision = 0 if numGlobalCareDet == 0 else float(matchedSum)/numGlobalCareDet
+    methodHmean = 0 if methodRecall + methodPrecision==0 else 2* methodRecall * methodPrecision / (methodRecall + methodPrecision)
+
+    det_only_methodRecall = 0 if det_only_numGlobalCareGt == 0 else float(det_only_matchedSum)/det_only_numGlobalCareGt
+    det_only_methodPrecision = 0 if det_only_numGlobalCareDet == 0 else float(det_only_matchedSum)/det_only_numGlobalCareDet
+    det_only_methodHmean = 0 if det_only_methodRecall + det_only_methodPrecision==0 else 2* det_only_methodRecall * det_only_methodPrecision / (det_only_methodRecall + det_only_methodPrecision)
+
+    methodMetrics = r"E2E_RESULTS: precision: {}, recall: {}, hmean: {}".format(methodPrecision, methodRecall, methodHmean)
+    det_only_methodMetrics = r"DETECTION_ONLY_RESULTS: precision: {}, recall: {}, hmean: {}".format(det_only_methodPrecision, det_only_methodRecall, det_only_methodHmean)
+
+    resDict = {'calculated':True,'Message':'','e2e_method': methodMetrics, 'det_only_method': det_only_methodMetrics, 'per_sample': perSampleMetrics}
+    
+    
+    return resDict;
+
+
+
+def text_eval_main_ic15(det_file, gt_file, is_word_spotting):
+    global WORD_SPOTTING
+    WORD_SPOTTING = is_word_spotting
+    p = {
+        'g': gt_file,  
+        's': det_file
+    }
+    return rrc_evaluation_funcs.main_evaluation(p,default_evaluation_params,validate_data,evaluate_method)
\ No newline at end of file
diff --git a/src/sts/detectron2/evaluation/text_evaluation.py b/src/sts/detectron2/evaluation/text_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a44925fdd7b55ff2867381985e87c99d2cd6a59
--- /dev/null
+++ b/src/sts/detectron2/evaluation/text_evaluation.py
@@ -0,0 +1,684 @@
+import contextlib
+import copy
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import re
+import torch
+from collections import OrderedDict
+from fvcore.common.file_io import PathManager
+from pycocotools.coco import COCO
+
+from detectron2.utils import comm
+from detectron2.data import MetadataCatalog
+from detectron2.evaluation.evaluator import DatasetEvaluator
+
+import glob
+import shutil
+from shapely.geometry import Polygon, LinearRing
+from detectron2.evaluation import text_eval_script
+from detectron2.evaluation import text_eval_script_ic15
+import zipfile
+import pickle
+import cv2
+import editdistance
+class TextEvaluator(DatasetEvaluator):
+    """
+    Evaluate text proposals and recognition.
+    """
+
+    def __init__(self, dataset_name, cfg, distributed, output_dir=None):
+        self._tasks = ("polygon", "recognition")
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+
+        self._metadata = MetadataCatalog.get(dataset_name)
+        if not hasattr(self._metadata, "json_file"):
+            raise AttributeError(
+                f"json_file was not found in MetaDataCatalog for '{dataset_name}'."
+            )
+        
+        CTLABELS = [" ","!",'"',"#","$","%","&","'","(",")","*","+",",","-",".","/","0","1","2","3","4","5","6","7","8","9",":",";","<","=",">","?","@","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","[","\\","]","^","_","`","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","{","|","}","~","ˋ","ˊ","﹒","ˀ","˜","ˇ","ˆ","˒","‑",'´', "~"]
+
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        with contextlib.redirect_stdout(io.StringIO()):
+            self._coco_api = COCO(json_file)
+
+        self.dataset_name = dataset_name
+        # use dataset_name to decide eval_gt_path
+        self.lexicon_type = 3
+        if "totaltext" in dataset_name:
+            self._text_eval_gt_path = "datasets/evaluation/gt_totaltext.zip"
+            self._word_spotting = True
+            self.dataset_name = "totaltext"
+        elif "ctw1500" in dataset_name:
+            self._text_eval_gt_path = "datasets/evaluation/gt_ctw1500.zip"
+            self._word_spotting = False
+            self.dataset_name = "ctw1500"
+        elif "icdar2015" in dataset_name:
+            self._text_eval_gt_path = "datasets/evaluation/gt_icdar2015.zip"
+            self._word_spotting = False
+            self.dataset_name = "icdar2015"
+        elif "vintext" in dataset_name:
+            self.lexicon_type = None
+            self._text_eval_gt_path = "datasets/evaluation/gt_fimotext.zip"
+            self._word_spotting = True
+        elif "custom" in dataset_name:
+            self._text_eval_gt_path = "datasets/evaluation/gt_custom.zip"
+            self._word_spotting = False
+        self._text_eval_confidence = cfg.TEST.INFERENCE_TH_TEST
+        self.nms_enable = cfg.TEST.USE_NMS_IN_TSET
+
+    def reset(self):
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+            instances = output["instances"].to(self._cpu_device)
+            prediction["instances"] = self.instances_to_coco_json(instances, input)
+            self._predictions.append(prediction)
+
+    def to_eval_format(self, file_path, temp_dir="temp_det_results", cf_th=0.5):
+        def fis_ascii(s):
+            a = (ord(c) < 128 for c in s)
+            return all(a)
+
+        def de_ascii(s):
+            a = [c for c in s if ord(c) < 128]
+            outa = ''
+            for i in a:
+                outa +=i
+            return outa
+
+        with open(file_path, 'r') as f:
+            data = json.load(f)
+            with open('temp_all_det_cors.txt', 'w') as f2:
+                for ix in range(len(data)):
+                    if data[ix]['score'] > 0.1:
+                        outstr = '{}: '.format(data[ix]['image_id'])
+                        xmin = 1000000
+                        ymin = 1000000
+                        xmax = 0 
+                        ymax = 0
+                        for i in range(len(data[ix]['polys'])):
+                            outstr = outstr + str(int(data[ix]['polys'][i][0])) +','+str(int(data[ix]['polys'][i][1])) +','
+                        if not "vintext" in self.dataset_name:
+                            ass = de_ascii(data[ix]['rec'])
+                        else:
+                            ass = data[ix]['rec']
+                        if len(ass)>=0: # 
+                            outstr = outstr + str(round(data[ix]['score'], 3)) +',####'+ass+'\n'	
+                            f2.writelines(outstr)
+                f2.close()
+        dirn = temp_dir
+        lsc = [cf_th] 
+        fres = open('temp_all_det_cors.txt', 'r').readlines()
+        for isc in lsc:	
+            if not os.path.isdir(dirn):
+                os.mkdir(dirn)
+
+            for line in fres:
+                line = line.strip()
+                s = line.split(': ')
+                filename = '{:07d}.txt'.format(int(s[0]))
+                outName = os.path.join(dirn, filename)
+                with open(outName, 'a') as fout:
+                    ptr = s[1].strip().split(',####')
+                    score = ptr[0].split(',')[-1]
+                    if float(score) < isc:
+                        continue
+                    cors = ','.join(e for e in ptr[0].split(',')[:-1])
+                    fout.writelines(cors+',####'+ptr[1]+'\n')
+        os.remove("temp_all_det_cors.txt")
+
+    def sort_detection(self, temp_dir):
+        origin_file = temp_dir
+        output_file = "final_"+temp_dir
+        output_file_full = "full_final_"+temp_dir
+        if not os.path.isdir(output_file_full):
+            os.mkdir(output_file_full)
+        if not os.path.isdir(output_file):
+            os.mkdir(output_file)
+        files = glob.glob(origin_file+'*.txt')
+        files.sort()
+        if "totaltext" in self.dataset_name:
+            if not self.lexicon_type == None:
+                lexicon_path = 'datasets/totaltext/weak_voc_new.txt'
+                lexicon_fid=open(lexicon_path, 'r')
+                pair_list = open('datasets/totaltext/weak_voc_pair_list.txt', 'r')
+                pairs = dict()
+                for line in pair_list.readlines():
+                    line=line.strip()
+                    word = line.split(' ')[0].upper()
+                    word_gt = line[len(word)+1:]
+                    pairs[word] = word_gt
+                lexicon_fid=open(lexicon_path, 'r')
+                lexicon=[]
+                for line in lexicon_fid.readlines():
+                    line=line.strip()
+                    lexicon.append(line)
+        elif "ctw1500" in self.dataset_name:
+            if not self.lexicon_type == None:
+                lexicon_path = 'datasets/CTW1500/weak_voc_new.txt'
+                lexicon_fid=open(lexicon_path, 'r')
+                pair_list = open('datasets/CTW1500/weak_voc_pair_list.txt', 'r')
+                pairs = dict()
+                lexicon_fid=open(lexicon_path, 'r')
+                lexicon=[]
+                for line in lexicon_fid.readlines():
+                    line=line.strip()
+                    lexicon.append(line)
+                    pairs[line.upper()] = line
+        elif "icdar2015" in self.dataset_name:
+            if self.lexicon_type==1: 
+                # generic lexicon
+                lexicon_path = 'datasets/icdar2015/GenericVocabulary_new.txt'
+                lexicon_fid=open(lexicon_path, 'r')
+                pair_list = open('datasets/icdar2015/GenericVocabulary_pair_list.txt', 'r')
+                pairs = dict()
+                for line in pair_list.readlines():
+                    line=line.strip()
+                    word = line.split(' ')[0].upper()
+                    word_gt = line[len(word)+1:]
+                    pairs[word] = word_gt
+                lexicon_fid=open(lexicon_path, 'r')
+                lexicon=[]
+                for line in lexicon_fid.readlines():
+                    line=line.strip()
+                    lexicon.append(line)
+            if self.lexicon_type==2:
+                # weak lexicon
+                lexicon_path = 'datasets/icdar2015/ch4_test_vocabulary_new.txt'
+                lexicon_fid=open(lexicon_path, 'r')
+                pair_list = open('datasets/icdar2015/ch4_test_vocabulary_pair_list.txt', 'r')
+                pairs = dict()
+                for line in pair_list.readlines():
+                    line=line.strip()
+                    word = line.split(' ')[0].upper()
+                    word_gt = line[len(word)+1:]
+                    pairs[word] = word_gt
+                lexicon_fid=open(lexicon_path, 'r')
+                lexicon=[]
+                for line in lexicon_fid.readlines():
+                    line=line.strip()
+                    lexicon.append(line)
+
+        def find_match_word(rec_str, pairs, lexicon=None):
+            rec_str = rec_str.upper()
+            dist_min = 100
+            dist_min_pre = 100
+            match_word = ''
+            match_dist = 100
+            for word in lexicon:
+                word = word.upper()
+                ed = editdistance.eval(rec_str, word)
+                length_dist = abs(len(word) - len(rec_str))
+                dist = ed
+                if dist<dist_min:
+                    dist_min = dist
+                    match_word = pairs[word]
+                    match_dist = dist
+            return match_word, match_dist
+        for i in files:
+            if "icdar2015" in self.dataset_name:
+                out = output_file + 'res_img_' + str(int(i.split('/')[-1].split('.')[0])) + '.txt'
+                out_full = output_file_full + 'res_img_' + str(int(i.split('/')[-1].split('.')[0])) + '.txt'
+                if self.lexicon_type==3:
+                    lexicon_path = 'datasets/icdar2015/new_strong_lexicon/new_voc_img_' + str(int(i.split('/')[-1].split('.')[0])) + '.txt'
+                    lexicon_fid=open(lexicon_path, 'r')
+                    pair_list = open('datasets/icdar2015/new_strong_lexicon/pair_voc_img_' + str(int(i.split('/')[-1].split('.')[0])) + '.txt')
+                    pairs = dict()
+                    for line in pair_list.readlines():
+                        line=line.strip()
+                        word = line.split(' ')[0].upper()
+                        word_gt = line[len(word)+1:]
+                        pairs[word] = word_gt
+                    lexicon_fid=open(lexicon_path, 'r')
+                    lexicon=[]
+                    for line in lexicon_fid.readlines():
+                        line=line.strip()
+                        lexicon.append(line)
+            else:
+                out = i.replace(origin_file, output_file)
+                out_full = i.replace(origin_file, output_file_full)
+            fin = open(i, 'r').readlines()
+            fout = open(out, 'w')
+            fout_full = open(out_full, 'w')
+            for iline, line in enumerate(fin):
+                ptr = line.strip().split(',####')
+                rec  = ptr[1]
+                cors = ptr[0].split(',')
+                assert(len(cors) %2 == 0), 'cors invalid.'
+                pts = [(int(cors[j]), int(cors[j+1])) for j in range(0,len(cors),2)]
+                try:
+                    pgt = Polygon(pts)
+                except Exception as e:
+                    print(e)
+                    print('An invalid detection in {} line {} is removed ... '.format(i, iline))
+                    continue
+                
+                if not pgt.is_valid:
+                    print('An invalid detection in {} line {} is removed ... '.format(i, iline))
+                    continue
+                    
+                pRing = LinearRing(pts)
+                if not "icdar2015" in self.dataset_name:
+                    if pRing.is_ccw:
+                        pts.reverse()
+                outstr = ''
+                for ipt in pts[:-1]:
+                    outstr += (str(int(ipt[0]))+','+ str(int(ipt[1]))+',')
+                outstr += (str(int(pts[-1][0]))+','+ str(int(pts[-1][1])))
+                pts = outstr
+                if "icdar2015" in self.dataset_name:
+                    outstr = outstr + ',' + rec
+                else:
+                    outstr = outstr + ',####' + rec
+                fout.writelines(outstr+'\n')
+                if self.lexicon_type is None:
+                    rec_full = rec
+                else:
+                    match_word, match_dist = find_match_word(rec,pairs,lexicon)
+                    if match_dist<1.5:
+                        rec_full = match_word
+                        if "icdar2015" in self.dataset_name:
+                            pts = pts + ',' + rec_full
+                        else:
+                            pts = pts + ',####' + rec_full
+                        fout_full.writelines(pts+'\n')
+            fout.close()
+            fout_full.close()
+        def zipdir(path, ziph):
+            # ziph is zipfile handle
+            for root, dirs, files in os.walk(path):
+                for file in files:
+                    ziph.write(os.path.join(root, file))
+        if "icdar2015" in self.dataset_name:
+            os.system('zip -r -q -j '+'det.zip'+' '+output_file+'/*')
+            os.system('zip -r -q -j '+'det_full.zip'+' '+output_file_full+'/*')
+            shutil.rmtree(origin_file)
+            shutil.rmtree(output_file)
+            shutil.rmtree(output_file_full)
+            return "det.zip", "det_full.zip"
+        else:
+            os.chdir(output_file)
+            zipf = zipfile.ZipFile('../det.zip', 'w', zipfile.ZIP_DEFLATED)
+            zipdir('./', zipf)
+            zipf.close()
+            os.chdir("../")
+
+            os.chdir(output_file_full)
+            zipf_full = zipfile.ZipFile('../det_full.zip', 'w', zipfile.ZIP_DEFLATED)
+            zipdir('./', zipf_full)
+            zipf_full.close()
+            os.chdir("../")
+            # clean temp files
+            shutil.rmtree(origin_file)
+            shutil.rmtree(output_file)
+            shutil.rmtree(output_file_full)
+            return "det.zip", "det_full.zip"
+    
+    def evaluate_with_official_code(self, result_path, gt_path):
+        if "icdar2015" in self.dataset_name:
+            return text_eval_script_ic15.text_eval_main_ic15(det_file=result_path, gt_file=gt_path, is_word_spotting=self._word_spotting)
+        else:
+            return text_eval_script.text_eval_main(det_file=result_path, gt_file=gt_path, is_word_spotting=self._word_spotting)
+
+    def evaluate(self):
+        if self._distributed:
+            comm.synchronize()
+            predictions = comm.gather(self._predictions, dst=0)
+            predictions = list(itertools.chain(*predictions))
+
+            if not comm.is_main_process():
+                return {}
+        else:
+            predictions = self._predictions
+
+        if len(predictions) == 0:
+            self._logger.warning("[COCOEvaluator] Did not receive valid predictions.")
+            return {}
+
+        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        PathManager.mkdirs(self._output_dir)
+        file_path = os.path.join(self._output_dir, "text_results.json")
+        self._logger.info("Saving results to {}".format(file_path))
+        with PathManager.open(file_path, "w", encoding='utf-8') as f:
+            f.write(str(json.dumps(coco_results, ensure_ascii = True)))
+            f.flush()
+        self._results = OrderedDict()
+        # eval text
+        if not self._text_eval_gt_path:
+            return copy.deepcopy(self._results)
+        temp_dir = "temp_det_results/"
+        self.to_eval_format(file_path, temp_dir, self._text_eval_confidence)
+        result_path, result_path_full = self.sort_detection(temp_dir)
+        text_result = self.evaluate_with_official_code(result_path, self._text_eval_gt_path) # None 
+        text_result["e2e_method"] = "None-" + text_result["e2e_method"]
+        if not self.lexicon_type == None:
+            dict_lexicon = {"1": "Generic", "2": "Weak", "3": "Strong"}
+            text_result_full = self.evaluate_with_official_code(result_path_full, self._text_eval_gt_path) # with lexicon
+            text_result_full["e2e_method"] = dict_lexicon[str(self.lexicon_type)] + "-" + text_result_full["e2e_method"]
+        # os.remove(result_path)
+        # os.remove(result_path_full)
+        # parse
+        template = "(\S+): (\S+): (\S+), (\S+): (\S+), (\S+): (\S+)"
+        result = text_result["det_only_method"]
+        groups = re.match(template, result).groups()
+        self._results[groups[0]] = {groups[i*2+1]: float(groups[(i+1)*2]) for i in range(3)}
+        result = text_result["e2e_method"]
+        groups = re.match(template, result).groups()
+        self._results[groups[0]] = {groups[i*2+1]: float(groups[(i+1)*2]) for i in range(3)}
+        if not self.lexicon_type == None:
+            result = text_result_full["e2e_method"]
+            groups = re.match(template, result).groups()
+            self._results[groups[0]] = {groups[i*2+1]: float(groups[(i+1)*2]) for i in range(3)}
+
+        return copy.deepcopy(self._results)
+
+
+    def instances_to_coco_json(self, instances, inputs):
+        img_id = inputs["image_id"]
+        width = inputs['width']
+        height = inputs['height']
+        num_instances = len(instances)
+        if num_instances == 0:
+            return []
+        scores = instances.scores.tolist()
+        masks = np.asarray(instances.pred_masks)
+        masks = [GenericMask(x, height, width) for x in masks]
+        recs = instances.pred_rec.numpy()
+
+        if self.nms_enable:
+            polys = []
+            for mask in masks:
+                if not len(mask.polygons):
+                    continue
+                polys.append(np.concatenate(mask.polygons).reshape(-1,2))
+            keep = self.py_cpu_pnms(polys,scores,0.5)
+
+        results = []
+        i = 0
+        for mask, rec, score in zip(masks, recs, scores):
+            # if rec > 0.3:
+            if not len(mask.polygons):
+                continue
+            if self.nms_enable:
+                if i not in keep:
+                    i = i+1
+                    continue
+            poly = polys[i]
+            if 'icdar2015'  in self.dataset_name:
+                poly = polygon2rbox(poly, height, width)
+                poly = np.array(poly)
+            rec_string = self.decode(rec)
+            if not len(rec_string):
+                i = i+1
+                continue
+            result = {
+                "image_id": img_id,
+                "category_id": 1,
+                "polys": poly.tolist(),
+                "rec": rec_string,
+                "score": score,
+            }
+            results.append(result)
+            i = i+1
+        return results
+  
+    def decode(self, rec):
+        CTLABELS = [" ","!",'"',"#","$","%","&","'","(",")","*","+",",","-",".","/","0","1","2","3","4","5","6","7","8","9",":",";","<","=",">","?","@","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","[","\\","]","^","_","`","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","{","|","}","~","ˋ","ˊ","﹒","ˀ","˜","ˇ","ˆ","˒","‑",'´', "~"]
+        s = ''
+        tmp = []
+        for i in range(len(rec)-1):
+            if i == 0:
+                tmp.append(rec[i])
+            else:
+                if rec[i] != rec[i-1]:
+                    tmp.append(rec[i])
+        for c in tmp:
+            c = int(c)
+            if 0<c < len(CTLABELS):
+                if not "ctw1500" in self.dataset_name and not "vintext" in self.dataset_name:
+                    print('a')
+                    if CTLABELS[c-1] in "_0123456789abcdefghijklmnopqrstuvwxyz":
+                        s += CTLABELS[c-1]
+                else:
+                    # print('b')
+                    s += CTLABELS[c-1]
+            else:
+                s += u''
+        if "vintext" in self.dataset_name:
+            s = vintext_decoder(s)
+        return s
+
+    def py_cpu_pnms(self, dets, scores, thresh):
+        pts = dets
+        scores = np.array(scores)
+        order = scores.argsort()[::-1]
+        areas = np.zeros(scores.shape)
+        order = scores.argsort()[::-1]
+        inter_areas = np.zeros((scores.shape[0], scores.shape[0]))
+        for il in range(len(pts)):
+            poly = Polygon(pts[il]).buffer(0.001)
+            areas[il] = poly.area
+            for jl in range(il, len(pts)):
+                polyj = Polygon(pts[jl].tolist()).buffer(0.001)
+                inS = poly.intersection(polyj)
+                try:
+                    inter_areas[il][jl] = inS.area
+                except:
+                    import pdb;pdb.set_trace()
+                inter_areas[jl][il] = inS.area
+
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+
+            ovr = inter_areas[i][order[1:]] / ((areas[i]) + areas[order[1:]] - inter_areas[i][order[1:]])
+            inds = np.where(ovr <= thresh)[0]
+            order = order[inds + 1]
+
+        return keep
+
+def polygon2rbox(polygon, image_height, image_width):
+    poly = np.array(polygon).reshape((-1, 2)).astype(np.float32)
+    rect = cv2.minAreaRect(poly)
+    corners = cv2.boxPoints(rect)
+    corners = np.array(corners, dtype="int")
+    pts = get_tight_rect(corners, 0, 0, image_height, image_width, 1)
+    pts = np.array(pts).reshape(-1,2)
+    pts = pts.tolist()
+    return pts
+
+def get_tight_rect(points, start_x, start_y, image_height, image_width, scale):
+    points = list(points)
+    ps = sorted(points, key=lambda x: x[0])
+
+    if ps[1][1] > ps[0][1]:
+        px1 = ps[0][0] * scale + start_x
+        py1 = ps[0][1] * scale + start_y
+        px4 = ps[1][0] * scale + start_x
+        py4 = ps[1][1] * scale + start_y
+    else:
+        px1 = ps[1][0] * scale + start_x
+        py1 = ps[1][1] * scale + start_y
+        px4 = ps[0][0] * scale + start_x
+        py4 = ps[0][1] * scale + start_y
+    if ps[3][1] > ps[2][1]:
+        px2 = ps[2][0] * scale + start_x
+        py2 = ps[2][1] * scale + start_y
+        px3 = ps[3][0] * scale + start_x
+        py3 = ps[3][1] * scale + start_y
+    else:
+        px2 = ps[3][0] * scale + start_x
+        py2 = ps[3][1] * scale + start_y
+        px3 = ps[2][0] * scale + start_x
+        py3 = ps[2][1] * scale + start_y
+
+    px1 = min(max(px1, 1), image_width - 1)
+    px2 = min(max(px2, 1), image_width - 1)
+    px3 = min(max(px3, 1), image_width - 1)
+    px4 = min(max(px4, 1), image_width - 1)
+    py1 = min(max(py1, 1), image_height - 1)
+    py2 = min(max(py2, 1), image_height - 1)
+    py3 = min(max(py3, 1), image_height - 1)
+    py4 = min(max(py4, 1), image_height - 1)
+    return [px1, py1, px2, py2, px3, py3, px4, py4]
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (height, width), m.shape
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        #res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+dictionary = "aàáạảãâầấậẩẫăằắặẳẵAÀÁẠẢÃĂẰẮẶẲẴÂẦẤẬẨẪeèéẹẻẽêềếệểễEÈÉẸẺẼÊỀẾỆỂỄoòóọỏõôồốộổỗơờớợởỡOÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠiìíịỉĩIÌÍỊỈĨuùúụủũưừứựửữƯỪỨỰỬỮUÙÚỤỦŨyỳýỵỷỹYỲÝỴỶỸ"
+
+
+def make_groups():
+    groups = []
+    i = 0
+    while i < len(dictionary) - 5:
+        group = [c for c in dictionary[i : i + 6]]
+        i += 6
+        groups.append(group)
+    return groups
+
+
+groups = make_groups()
+
+TONES = ["", "ˋ", "ˊ", "﹒", "ˀ", "˜"]
+SOURCES = ["ă", "â", "Ă", "Â", "ê", "Ê", "ô", "ơ", "Ô", "Ơ", "ư", "Ư", "Đ", "đ"]
+TARGETS = ["aˇ", "aˆ", "Aˇ", "Aˆ", "eˆ", "Eˆ", "oˆ", "o˒", "Oˆ", "O˒", "u˒", "U˒", "D-", "d‑"]
+
+
+def correct_tone_position(word):
+    word = word[:-1]
+    if len(word) < 2:
+        pass
+    first_ord_char = ""
+    second_order_char = ""
+    for char in word:
+        for group in groups:
+            if char in group:
+                second_order_char = first_ord_char
+                first_ord_char = group[0]
+    if word[-1] == first_ord_char and second_order_char != "":
+        pair_chars = ["qu", "Qu", "qU", "QU", "gi", "Gi", "gI", "GI"]
+        for pair in pair_chars:
+            if pair in word and second_order_char in ["u", "U", "i", "I"]:
+                return first_ord_char
+        return second_order_char
+    return first_ord_char
+
+
+def vintext_decoder(recognition):
+    for char in TARGETS:
+        recognition = recognition.replace(char, SOURCES[TARGETS.index(char)])
+    if len(recognition) < 1:
+        return recognition
+    if recognition[-1] in TONES:
+        if len(recognition) < 2:
+            return recognition
+        replace_char = correct_tone_position(recognition)
+        tone = recognition[-1]
+        recognition = recognition[:-1]
+        for group in groups:
+            if replace_char in group:
+                recognition = recognition.replace(replace_char, group[TONES.index(tone)])
+    return recognition
diff --git a/src/sts/detectron2/export/README.md b/src/sts/detectron2/export/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9fcd33513fb81ef3aeb4d3c8d9732324dffa2646
--- /dev/null
+++ b/src/sts/detectron2/export/README.md
@@ -0,0 +1,13 @@
+
+This directory contains code to prepare a detectron2 model for deployment.
+Currently it supports exporting a detectron2 model to Caffe2 format through ONNX.
+
+Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage.
+
+
+### Acknowledgements
+
+Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools.
+
+Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who
+help export Detectron2 models to TorchScript.
diff --git a/src/sts/detectron2/export/__init__.py b/src/sts/detectron2/export/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..78c27d64fa42760eeacd14d241cf28d58e3da490
--- /dev/null
+++ b/src/sts/detectron2/export/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+from .api import *
+from .flatten import TracingAdapter
+from .torchscript import scripting_with_instances, dump_torchscript_IR
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/detectron2/export/api.py b/src/sts/detectron2/export/api.py
new file mode 100644
index 0000000000000000000000000000000000000000..e80989231ea5233e40f48a76e375a5a3c39208b1
--- /dev/null
+++ b/src/sts/detectron2/export/api.py
@@ -0,0 +1,273 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import os
+import torch
+from caffe2.proto import caffe2_pb2
+from torch import nn
+
+from detectron2.config import CfgNode
+from detectron2.utils.file_io import PathManager
+
+from .caffe2_inference import ProtobufDetectionModel
+from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
+from .shared import get_pb_arg_vali, get_pb_arg_vals, save_graph
+
+__all__ = [
+    "add_export_config",
+    "export_caffe2_model",
+    "Caffe2Model",
+    "export_onnx_model",
+    "Caffe2Tracer",
+]
+
+
+def add_export_config(cfg):
+    """
+    Add options needed by caffe2 export.
+
+    Args:
+        cfg (CfgNode): a detectron2 config
+
+    Returns:
+        CfgNode:
+            an updated config with new options that will be used by :class:`Caffe2Tracer`.
+    """
+    is_frozen = cfg.is_frozen()
+    cfg.defrost()
+    cfg.EXPORT_CAFFE2 = CfgNode()
+    cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT = False
+    if is_frozen:
+        cfg.freeze()
+    return cfg
+
+
+class Caffe2Tracer:
+    """
+    Make a detectron2 model traceable with Caffe2 operators.
+    This class creates a traceable version of a detectron2 model which:
+
+    1. Rewrite parts of the model using ops in Caffe2. Note that some ops do
+       not have GPU implementation in Caffe2.
+    2. Remove post-processing and only produce raw layer outputs
+
+    After making a traceable model, the class provide methods to export such a
+    model to different deployment formats.
+    Exported graph produced by this class take two input tensors:
+
+    1. (1, C, H, W) float "data" which is an image (usually in [0, 255]).
+       (H, W) often has to be padded to multiple of 32 (depend on the model
+       architecture).
+    2. 1x3 float "im_info", each row of which is (height, width, 1.0).
+       Height and width are true image shapes before padding.
+
+    The class currently only supports models using builtin meta architectures.
+    Batch inference is not supported, and contributions are welcome.
+    """
+
+    def __init__(self, cfg: CfgNode, model: nn.Module, inputs):
+        """
+        Args:
+            cfg (CfgNode): a detectron2 config, with extra export-related options
+                added by :func:`add_export_config`. It's used to construct
+                caffe2-compatible model.
+            model (nn.Module): An original pytorch model. Must be among a few official models
+                in detectron2 that can be converted to become caffe2-compatible automatically.
+                Weights have to be already loaded to this model.
+            inputs: sample inputs that the given model takes for inference.
+                Will be used to trace the model. For most models, random inputs with
+                no detected objects will not work as they lead to wrong traces.
+        """
+        assert isinstance(cfg, CfgNode), cfg
+        assert isinstance(model, torch.nn.Module), type(model)
+
+        if "EXPORT_CAFFE2" not in cfg:
+            cfg = add_export_config(cfg)  # will just the defaults
+        # TODO make it support custom models, by passing in c2 model directly
+        C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[cfg.MODEL.META_ARCHITECTURE]
+        self.traceable_model = C2MetaArch(cfg, copy.deepcopy(model))
+        self.inputs = inputs
+        self.traceable_inputs = self.traceable_model.get_caffe2_inputs(inputs)
+
+    def export_caffe2(self):
+        """
+        Export the model to Caffe2's protobuf format.
+        The returned object can be saved with its :meth:`.save_protobuf()` method.
+        The result can be loaded and executed using Caffe2 runtime.
+
+        Returns:
+            :class:`Caffe2Model`
+        """
+        from .caffe2_export import export_caffe2_detection_model
+
+        predict_net, init_net = export_caffe2_detection_model(
+            self.traceable_model, self.traceable_inputs
+        )
+        return Caffe2Model(predict_net, init_net)
+
+    def export_onnx(self):
+        """
+        Export the model to ONNX format.
+        Note that the exported model contains custom ops only available in caffe2, therefore it
+        cannot be directly executed by other runtime (such as onnxruntime or TensorRT).
+        Post-processing or transformation passes may be applied on the model to accommodate
+        different runtimes, but we currently do not provide support for them.
+
+        Returns:
+            onnx.ModelProto: an onnx model.
+        """
+        from .caffe2_export import export_onnx_model as export_onnx_model_impl
+
+        return export_onnx_model_impl(self.traceable_model, (self.traceable_inputs,))
+
+    def export_torchscript(self):
+        """
+        Export the model to a ``torch.jit.TracedModule`` by tracing.
+        The returned object can be saved to a file by ``.save()``.
+
+        Returns:
+            torch.jit.TracedModule: a torch TracedModule
+        """
+        logger = logging.getLogger(__name__)
+        logger.info("Tracing the model with torch.jit.trace ...")
+        with torch.no_grad():
+            return torch.jit.trace(self.traceable_model, (self.traceable_inputs,))
+
+
+class Caffe2Model(nn.Module):
+    """
+    A wrapper around the traced model in Caffe2's protobuf format.
+    The exported graph has different inputs/outputs from the original Pytorch
+    model, as explained in :class:`Caffe2Tracer`. This class wraps around the
+    exported graph to simulate the same interface as the original Pytorch model.
+    It also provides functions to save/load models in Caffe2's format.'
+
+    Examples:
+    ::
+        c2_model = Caffe2Tracer(cfg, torch_model, inputs).export_caffe2()
+        inputs = [{"image": img_tensor_CHW}]
+        outputs = c2_model(inputs)
+        orig_outputs = torch_model(inputs)
+    """
+
+    def __init__(self, predict_net, init_net):
+        super().__init__()
+        self.eval()  # always in eval mode
+        self._predict_net = predict_net
+        self._init_net = init_net
+        self._predictor = None
+
+    __init__.__HIDE_SPHINX_DOC__ = True
+
+    @property
+    def predict_net(self):
+        """
+        caffe2.core.Net: the underlying caffe2 predict net
+        """
+        return self._predict_net
+
+    @property
+    def init_net(self):
+        """
+        caffe2.core.Net: the underlying caffe2 init net
+        """
+        return self._init_net
+
+    def save_protobuf(self, output_dir):
+        """
+        Save the model as caffe2's protobuf format.
+        It saves the following files:
+
+            * "model.pb": definition of the graph. Can be visualized with
+              tools like `netron <https://github.com/lutzroeder/netron>`_.
+            * "model_init.pb": model parameters
+            * "model.pbtxt": human-readable definition of the graph. Not
+              needed for deployment.
+
+        Args:
+            output_dir (str): the output directory to save protobuf files.
+        """
+        logger = logging.getLogger(__name__)
+        logger.info("Saving model to {} ...".format(output_dir))
+        if not PathManager.exists(output_dir):
+            PathManager.mkdirs(output_dir)
+
+        with PathManager.open(os.path.join(output_dir, "model.pb"), "wb") as f:
+            f.write(self._predict_net.SerializeToString())
+        with PathManager.open(os.path.join(output_dir, "model.pbtxt"), "w") as f:
+            f.write(str(self._predict_net))
+        with PathManager.open(os.path.join(output_dir, "model_init.pb"), "wb") as f:
+            f.write(self._init_net.SerializeToString())
+
+    def save_graph(self, output_file, inputs=None):
+        """
+        Save the graph as SVG format.
+
+        Args:
+            output_file (str): a SVG file
+            inputs: optional inputs given to the model.
+                If given, the inputs will be used to run the graph to record
+                shape of every tensor. The shape information will be
+                saved together with the graph.
+        """
+        from .caffe2_export import run_and_save_graph
+
+        if inputs is None:
+            save_graph(self._predict_net, output_file, op_only=False)
+        else:
+            size_divisibility = get_pb_arg_vali(self._predict_net, "size_divisibility", 0)
+            device = get_pb_arg_vals(self._predict_net, "device", b"cpu").decode("ascii")
+            inputs = convert_batched_inputs_to_c2_format(inputs, size_divisibility, device)
+            inputs = [x.cpu().numpy() for x in inputs]
+            run_and_save_graph(self._predict_net, self._init_net, inputs, output_file)
+
+    @staticmethod
+    def load_protobuf(dir):
+        """
+        Args:
+            dir (str): a directory used to save Caffe2Model with
+                :meth:`save_protobuf`.
+                The files "model.pb" and "model_init.pb" are needed.
+
+        Returns:
+            Caffe2Model: the caffe2 model loaded from this directory.
+        """
+        predict_net = caffe2_pb2.NetDef()
+        with PathManager.open(os.path.join(dir, "model.pb"), "rb") as f:
+            predict_net.ParseFromString(f.read())
+
+        init_net = caffe2_pb2.NetDef()
+        with PathManager.open(os.path.join(dir, "model_init.pb"), "rb") as f:
+            init_net.ParseFromString(f.read())
+
+        return Caffe2Model(predict_net, init_net)
+
+    def __call__(self, inputs):
+        """
+        An interface that wraps around a Caffe2 model and mimics detectron2's models'
+        input/output format. See details about the format at :doc:`/tutorials/models`.
+        This is used to compare the outputs of caffe2 model with its original torch model.
+
+        Due to the extra conversion between Pytorch/Caffe2, this method is not meant for
+        benchmark. Because of the conversion, this method also has dependency
+        on detectron2 in order to convert to detectron2's output format.
+        """
+        if self._predictor is None:
+            self._predictor = ProtobufDetectionModel(self._predict_net, self._init_net)
+        return self._predictor(inputs)
+
+
+def export_caffe2_model(cfg, model, inputs):
+    logger = logging.getLogger(__name__)
+    logger.warning(
+        "export_caffe2_model() is deprecated. Please use `Caffe2Tracer().export_caffe2() instead."
+    )
+    return Caffe2Tracer(cfg, model, inputs).export_caffe2()
+
+
+def export_onnx_model(cfg, model, inputs):
+    logger = logging.getLogger(__name__)
+    logger.warning(
+        "export_caffe2_model() is deprecated. Please use `Caffe2Tracer().export_onnx() instead."
+    )
+    return Caffe2Tracer(cfg, model, inputs).export_onnx()
diff --git a/src/sts/detectron2/export/c10.py b/src/sts/detectron2/export/c10.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8c52d45964fdbffa52648439f6b82c6b8b3c219
--- /dev/null
+++ b/src/sts/detectron2/export/c10.py
@@ -0,0 +1,527 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import math
+import torch
+import torch.nn.functional as F
+
+from detectron2.layers import cat
+from detectron2.layers.roi_align_rotated import ROIAlignRotated
+from detectron2.modeling import poolers
+from detectron2.modeling.proposal_generator import rpn
+from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference
+from detectron2.structures import Boxes, ImageList, Instances, Keypoints
+
+from .shared import alias, to_device
+
+
+"""
+This file contains caffe2-compatible implementation of several detectron2 components.
+"""
+
+
+class Caffe2Boxes(Boxes):
+    """
+    Representing a list of detectron2.structures.Boxes from minibatch, each box
+    is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector
+    (batch index + 5 coordinates) for RotatedBoxes.
+    """
+
+    def __init__(self, tensor):
+        assert isinstance(tensor, torch.Tensor)
+        assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size()
+        # TODO: make tensor immutable when dim is Nx5 for Boxes,
+        # and Nx6 for RotatedBoxes?
+        self.tensor = tensor
+
+
+# TODO clean up this class, maybe just extend Instances
+class InstancesList(object):
+    """
+    Tensor representation of a list of Instances object for a batch of images.
+
+    When dealing with a batch of images with Caffe2 ops, a list of bboxes
+    (instances) are usually represented by single Tensor with size
+    (sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is
+    for providing common functions to convert between these two representations.
+    """
+
+    def __init__(self, im_info, indices, extra_fields=None):
+        # [N, 3] -> (H, W, Scale)
+        self.im_info = im_info
+        # [N,] -> indice of batch to which the instance belongs
+        self.indices = indices
+        # [N, ...]
+        self.batch_extra_fields = extra_fields or {}
+
+        self.image_size = self.im_info
+
+    def get_fields(self):
+        """like `get_fields` in the Instances object,
+        but return each field in tensor representations"""
+        ret = {}
+        for k, v in self.batch_extra_fields.items():
+            # if isinstance(v, torch.Tensor):
+            #     tensor_rep = v
+            # elif isinstance(v, (Boxes, Keypoints)):
+            #     tensor_rep = v.tensor
+            # else:
+            #     raise ValueError("Can't find tensor representation for: {}".format())
+            ret[k] = v
+        return ret
+
+    def has(self, name):
+        return name in self.batch_extra_fields
+
+    def set(self, name, value):
+        data_len = len(value)
+        if len(self.batch_extra_fields):
+            assert (
+                len(self) == data_len
+            ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
+        self.batch_extra_fields[name] = value
+
+    def __setattr__(self, name, val):
+        if name in ["im_info", "indices", "batch_extra_fields", "image_size"]:
+            super().__setattr__(name, val)
+        else:
+            self.set(name, val)
+
+    def __getattr__(self, name):
+        if name not in self.batch_extra_fields:
+            raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
+        return self.batch_extra_fields[name]
+
+    def __len__(self):
+        return len(self.indices)
+
+    def flatten(self):
+        ret = []
+        for _, v in self.batch_extra_fields.items():
+            if isinstance(v, (Boxes, Keypoints)):
+                ret.append(v.tensor)
+            else:
+                ret.append(v)
+        return ret
+
+    @staticmethod
+    def to_d2_instances_list(instances_list):
+        """
+        Convert InstancesList to List[Instances]. The input `instances_list` can
+        also be a List[Instances], in this case this method is a non-op.
+        """
+        if not isinstance(instances_list, InstancesList):
+            assert all(isinstance(x, Instances) for x in instances_list)
+            return instances_list
+
+        ret = []
+        for i, info in enumerate(instances_list.im_info):
+            instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())]))
+
+            ids = instances_list.indices == i
+            for k, v in instances_list.batch_extra_fields.items():
+                if isinstance(v, torch.Tensor):
+                    instances.set(k, v[ids])
+                    continue
+                elif isinstance(v, Boxes):
+                    instances.set(k, v[ids, -4:])
+                    continue
+
+                target_type, tensor_source = v
+                assert isinstance(tensor_source, torch.Tensor)
+                assert tensor_source.shape[0] == instances_list.indices.shape[0]
+                tensor_source = tensor_source[ids]
+
+                if issubclass(target_type, Boxes):
+                    instances.set(k, Boxes(tensor_source[:, -4:]))
+                elif issubclass(target_type, Keypoints):
+                    instances.set(k, Keypoints(tensor_source))
+                elif issubclass(target_type, torch.Tensor):
+                    instances.set(k, tensor_source)
+                else:
+                    raise ValueError("Can't handle targe type: {}".format(target_type))
+
+            ret.append(instances)
+        return ret
+
+
+class Caffe2Compatible(object):
+    """
+    A model can inherit this class to indicate that it can be traced and deployed with caffe2.
+    """
+
+    def _get_tensor_mode(self):
+        return self._tensor_mode
+
+    def _set_tensor_mode(self, v):
+        self._tensor_mode = v
+
+    tensor_mode = property(_get_tensor_mode, _set_tensor_mode)
+    """
+    If true, the model expects C2-style tensor only inputs/outputs format.
+    """
+
+
+class Caffe2RPN(Caffe2Compatible, rpn.RPN):
+    def _generate_proposals(
+        self, images, objectness_logits_pred, anchor_deltas_pred, gt_instances=None
+    ):
+        assert isinstance(images, ImageList)
+        if self.tensor_mode:
+            im_info = images.image_sizes
+        else:
+            im_info = torch.tensor([[im_sz[0], im_sz[1], 1.0] for im_sz in images.image_sizes]).to(
+                images.tensor.device
+            )
+        assert isinstance(im_info, torch.Tensor)
+
+        rpn_rois_list = []
+        rpn_roi_probs_list = []
+        for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip(
+            objectness_logits_pred,
+            anchor_deltas_pred,
+            iter(self.anchor_generator.cell_anchors),
+            self.anchor_generator.strides,
+        ):
+            scores = scores.detach()
+            bbox_deltas = bbox_deltas.detach()
+
+            rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals(
+                scores,
+                bbox_deltas,
+                im_info,
+                cell_anchors_tensor,
+                spatial_scale=1.0 / feat_stride,
+                pre_nms_topN=self.pre_nms_topk[self.training],
+                post_nms_topN=self.post_nms_topk[self.training],
+                nms_thresh=self.nms_thresh,
+                min_size=self.min_box_size,
+                # correct_transform_coords=True,  # deprecated argument
+                angle_bound_on=True,  # Default
+                angle_bound_lo=-180,
+                angle_bound_hi=180,
+                clip_angle_thresh=1.0,  # Default
+                legacy_plus_one=False,
+            )
+            rpn_rois_list.append(rpn_rois)
+            rpn_roi_probs_list.append(rpn_roi_probs)
+
+        # For FPN in D2, in RPN all proposals from different levels are concated
+        # together, ranked and picked by top post_nms_topk. Then in ROIPooler
+        # it calculates level_assignments and calls the RoIAlign from
+        # the corresponding level.
+
+        if len(objectness_logits_pred) == 1:
+            rpn_rois = rpn_rois_list[0]
+            rpn_roi_probs = rpn_roi_probs_list[0]
+        else:
+            assert len(rpn_rois_list) == len(rpn_roi_probs_list)
+            rpn_post_nms_topN = self.post_nms_topk[self.training]
+
+            device = rpn_rois_list[0].device
+            input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)]
+
+            # TODO remove this after confirming rpn_max_level/rpn_min_level
+            # is not needed in CollectRpnProposals.
+            feature_strides = list(self.anchor_generator.strides)
+            rpn_min_level = int(math.log2(feature_strides[0]))
+            rpn_max_level = int(math.log2(feature_strides[-1]))
+            assert (rpn_max_level - rpn_min_level + 1) == len(
+                rpn_rois_list
+            ), "CollectRpnProposals requires continuous levels"
+
+            rpn_rois = torch.ops._caffe2.CollectRpnProposals(
+                input_list,
+                # NOTE: in current implementation, rpn_max_level and rpn_min_level
+                # are not needed, only the subtraction of two matters and it
+                # can be infer from the number of inputs. Keep them now for
+                # consistency.
+                rpn_max_level=2 + len(rpn_rois_list) - 1,
+                rpn_min_level=2,
+                rpn_post_nms_topN=rpn_post_nms_topN,
+            )
+            rpn_rois = to_device(rpn_rois, device)
+            rpn_roi_probs = []
+
+        proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode)
+        return proposals, {}
+
+    def forward(self, images, features, gt_instances=None):
+        assert not self.training
+        features = [features[f] for f in self.in_features]
+        objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features)
+        return self._generate_proposals(
+            images,
+            objectness_logits_pred,
+            anchor_deltas_pred,
+            gt_instances,
+        )
+
+    @staticmethod
+    def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode):
+        proposals = InstancesList(
+            im_info=im_info,
+            indices=rpn_rois[:, 0],
+            extra_fields={
+                "proposal_boxes": Caffe2Boxes(rpn_rois),
+                "objectness_logits": (torch.Tensor, rpn_roi_probs),
+            },
+        )
+        if not tensor_mode:
+            proposals = InstancesList.to_d2_instances_list(proposals)
+        else:
+            proposals = [proposals]
+        return proposals
+
+
+class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler):
+    @staticmethod
+    def c2_preprocess(box_lists):
+        assert all(isinstance(x, Boxes) for x in box_lists)
+        if all(isinstance(x, Caffe2Boxes) for x in box_lists):
+            # input is pure-tensor based
+            assert len(box_lists) == 1
+            pooler_fmt_boxes = box_lists[0].tensor
+        else:
+            pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists)
+        return pooler_fmt_boxes
+
+    def forward(self, x, box_lists):
+        assert not self.training
+
+        pooler_fmt_boxes = self.c2_preprocess(box_lists)
+        num_level_assignments = len(self.level_poolers)
+
+        if num_level_assignments == 1:
+            if isinstance(self.level_poolers[0], ROIAlignRotated):
+                c2_roi_align = torch.ops._caffe2.RoIAlignRotated
+                aligned = True
+            else:
+                c2_roi_align = torch.ops._caffe2.RoIAlign
+                aligned = self.level_poolers[0].aligned
+
+            out = c2_roi_align(
+                x[0],
+                pooler_fmt_boxes,
+                order="NCHW",
+                spatial_scale=float(self.level_poolers[0].spatial_scale),
+                pooled_h=int(self.output_size[0]),
+                pooled_w=int(self.output_size[1]),
+                sampling_ratio=int(self.level_poolers[0].sampling_ratio),
+                aligned=aligned,
+            )
+            return out
+
+        device = pooler_fmt_boxes.device
+        assert (
+            self.max_level - self.min_level + 1 == 4
+        ), "Currently DistributeFpnProposals only support 4 levels"
+        fpn_outputs = torch.ops._caffe2.DistributeFpnProposals(
+            to_device(pooler_fmt_boxes, "cpu"),
+            roi_canonical_scale=self.canonical_box_size,
+            roi_canonical_level=self.canonical_level,
+            roi_max_level=self.max_level,
+            roi_min_level=self.min_level,
+            legacy_plus_one=False,
+        )
+        fpn_outputs = [to_device(x, device) for x in fpn_outputs]
+
+        rois_fpn_list = fpn_outputs[:-1]
+        rois_idx_restore_int32 = fpn_outputs[-1]
+
+        roi_feat_fpn_list = []
+        for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers):
+            if isinstance(pooler, ROIAlignRotated):
+                c2_roi_align = torch.ops._caffe2.RoIAlignRotated
+                aligned = True
+            else:
+                c2_roi_align = torch.ops._caffe2.RoIAlign
+                aligned = bool(pooler.aligned)
+
+            roi_feat_fpn = c2_roi_align(
+                x_level,
+                roi_fpn,
+                order="NCHW",
+                spatial_scale=float(pooler.spatial_scale),
+                pooled_h=int(self.output_size[0]),
+                pooled_w=int(self.output_size[1]),
+                sampling_ratio=int(pooler.sampling_ratio),
+                aligned=aligned,
+            )
+            roi_feat_fpn_list.append(roi_feat_fpn)
+
+        roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0)
+        assert roi_feat_shuffled.numel() > 0 and rois_idx_restore_int32.numel() > 0, (
+            "Caffe2 export requires tracing with a model checkpoint + input that can produce valid"
+            " detections. But no detections were obtained with the given checkpoint and input!"
+        )
+        roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32)
+        return roi_feat
+
+
+class Caffe2FastRCNNOutputsInference:
+    def __init__(self, tensor_mode):
+        self.tensor_mode = tensor_mode  # whether the output is caffe2 tensor mode
+
+    def __call__(self, box_predictor, predictions, proposals):
+        """ equivalent to FastRCNNOutputLayers.inference """
+        num_classes = box_predictor.num_classes
+        score_thresh = box_predictor.test_score_thresh
+        nms_thresh = box_predictor.test_nms_thresh
+        topk_per_image = box_predictor.test_topk_per_image
+        is_rotated = len(box_predictor.box2box_transform.weights) == 5
+
+        if is_rotated:
+            box_dim = 5
+            assert box_predictor.box2box_transform.weights[4] == 1, (
+                "The weights for Rotated BBoxTransform in C2 have only 4 dimensions,"
+                + " thus enforcing the angle weight to be 1 for now"
+            )
+            box2box_transform_weights = box_predictor.box2box_transform.weights[:4]
+        else:
+            box_dim = 4
+            box2box_transform_weights = box_predictor.box2box_transform.weights
+
+        class_logits, box_regression = predictions
+        if num_classes + 1 == class_logits.shape[1]:
+            class_prob = F.softmax(class_logits, -1)
+        else:
+            assert num_classes == class_logits.shape[1]
+            class_prob = F.sigmoid(class_logits)
+            # BoxWithNMSLimit will infer num_classes from the shape of the class_prob
+            # So append a zero column as placeholder for the background class
+            class_prob = torch.cat((class_prob, torch.zeros(class_prob.shape[0], 1)), dim=1)
+
+        assert box_regression.shape[1] % box_dim == 0
+        cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1
+
+        input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1
+
+        rois = type(proposals[0].proposal_boxes).cat([p.proposal_boxes for p in proposals])
+        device, dtype = rois.tensor.device, rois.tensor.dtype
+        if input_tensor_mode:
+            im_info = proposals[0].image_size
+            rois = rois.tensor
+        else:
+            im_info = torch.tensor(
+                [[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]]
+            )
+            batch_ids = cat(
+                [
+                    torch.full((b, 1), i, dtype=dtype, device=device)
+                    for i, b in enumerate(len(p) for p in proposals)
+                ],
+                dim=0,
+            )
+            rois = torch.cat([batch_ids, rois.tensor], dim=1)
+
+        roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform(
+            to_device(rois, "cpu"),
+            to_device(box_regression, "cpu"),
+            to_device(im_info, "cpu"),
+            weights=box2box_transform_weights,
+            apply_scale=True,
+            rotated=is_rotated,
+            angle_bound_on=True,
+            angle_bound_lo=-180,
+            angle_bound_hi=180,
+            clip_angle_thresh=1.0,
+            legacy_plus_one=False,
+        )
+        roi_pred_bbox = to_device(roi_pred_bbox, device)
+        roi_batch_splits = to_device(roi_batch_splits, device)
+
+        nms_outputs = torch.ops._caffe2.BoxWithNMSLimit(
+            to_device(class_prob, "cpu"),
+            to_device(roi_pred_bbox, "cpu"),
+            to_device(roi_batch_splits, "cpu"),
+            score_thresh=float(score_thresh),
+            nms=float(nms_thresh),
+            detections_per_im=int(topk_per_image),
+            soft_nms_enabled=False,
+            soft_nms_method="linear",
+            soft_nms_sigma=0.5,
+            soft_nms_min_score_thres=0.001,
+            rotated=is_rotated,
+            cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
+            input_boxes_include_bg_cls=False,
+            output_classes_include_bg_cls=False,
+            legacy_plus_one=False,
+        )
+        roi_score_nms = to_device(nms_outputs[0], device)
+        roi_bbox_nms = to_device(nms_outputs[1], device)
+        roi_class_nms = to_device(nms_outputs[2], device)
+        roi_batch_splits_nms = to_device(nms_outputs[3], device)
+        roi_keeps_nms = to_device(nms_outputs[4], device)
+        roi_keeps_size_nms = to_device(nms_outputs[5], device)
+        if not self.tensor_mode:
+            roi_class_nms = roi_class_nms.to(torch.int64)
+
+        roi_batch_ids = cat(
+            [
+                torch.full((b, 1), i, dtype=dtype, device=device)
+                for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms)
+            ],
+            dim=0,
+        )
+
+        roi_class_nms = alias(roi_class_nms, "class_nms")
+        roi_score_nms = alias(roi_score_nms, "score_nms")
+        roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms")
+        roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms")
+        roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms")
+        roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms")
+
+        results = InstancesList(
+            im_info=im_info,
+            indices=roi_batch_ids[:, 0],
+            extra_fields={
+                "pred_boxes": Caffe2Boxes(roi_bbox_nms),
+                "scores": roi_score_nms,
+                "pred_classes": roi_class_nms,
+            },
+        )
+
+        if not self.tensor_mode:
+            results = InstancesList.to_d2_instances_list(results)
+            batch_splits = roi_batch_splits_nms.int().tolist()
+            kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits))
+        else:
+            results = [results]
+            kept_indices = [roi_keeps_nms]
+
+        return results, kept_indices
+
+
+class Caffe2MaskRCNNInference:
+    def __call__(self, pred_mask_logits, pred_instances):
+        """ equivalent to mask_head.mask_rcnn_inference """
+        if all(isinstance(x, InstancesList) for x in pred_instances):
+            assert len(pred_instances) == 1
+            mask_probs_pred = pred_mask_logits.sigmoid()
+            mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs")
+            pred_instances[0].pred_masks = mask_probs_pred
+        else:
+            mask_rcnn_inference(pred_mask_logits, pred_instances)
+
+
+class Caffe2KeypointRCNNInference:
+    def __init__(self, use_heatmap_max_keypoint):
+        self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
+
+    def __call__(self, pred_keypoint_logits, pred_instances):
+        # just return the keypoint heatmap for now,
+        # there will be option to call HeatmapMaxKeypointOp
+        output = alias(pred_keypoint_logits, "kps_score")
+        if all(isinstance(x, InstancesList) for x in pred_instances):
+            assert len(pred_instances) == 1
+            if self.use_heatmap_max_keypoint:
+                device = output.device
+                output = torch.ops._caffe2.HeatmapMaxKeypoint(
+                    to_device(output, "cpu"),
+                    pred_instances[0].pred_boxes.tensor,
+                    should_output_softmax=True,  # worth make it configerable?
+                )
+                output = to_device(output, device)
+                output = alias(output, "keypoints_out")
+            pred_instances[0].pred_keypoints = output
+        return pred_keypoint_logits
diff --git a/src/sts/detectron2/export/caffe2_export.py b/src/sts/detectron2/export/caffe2_export.py
new file mode 100644
index 0000000000000000000000000000000000000000..74ac123a7aed6cd77d6d833446a831d9048745b2
--- /dev/null
+++ b/src/sts/detectron2/export/caffe2_export.py
@@ -0,0 +1,207 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import copy
+import io
+import logging
+import numpy as np
+from typing import List
+import onnx
+import torch
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from caffe2.python.onnx.backend import Caffe2Backend
+from tabulate import tabulate
+from termcolor import colored
+from torch.onnx import OperatorExportTypes
+
+from .shared import (
+    ScopedWS,
+    construct_init_net_from_params,
+    fuse_alias_placeholder,
+    fuse_copy_between_cpu_and_gpu,
+    get_params_from_init_net,
+    group_norm_replace_aten_with_caffe2,
+    infer_device_type,
+    remove_dead_end_ops,
+    remove_reshape_for_fc,
+    save_graph,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def export_onnx_model(model, inputs):
+    """
+    Trace and export a model to onnx format.
+
+    Args:
+        model (nn.Module):
+        inputs (tuple[args]): the model will be called by `model(*inputs)`
+
+    Returns:
+        an onnx model
+    """
+    assert isinstance(model, torch.nn.Module)
+
+    # make sure all modules are in eval mode, onnx may change the training state
+    # of the module if the states are not consistent
+    def _check_eval(module):
+        assert not module.training
+
+    model.apply(_check_eval)
+
+    # Export the model to ONNX
+    with torch.no_grad():
+        with io.BytesIO() as f:
+            torch.onnx.export(
+                model,
+                inputs,
+                f,
+                operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK,
+                # verbose=True,  # NOTE: uncomment this for debugging
+                # export_params=True,
+            )
+            onnx_model = onnx.load_from_string(f.getvalue())
+
+    # Apply ONNX's Optimization
+    all_passes = onnx.optimizer.get_available_passes()
+    passes = ["fuse_bn_into_conv"]
+    assert all(p in all_passes for p in passes)
+    onnx_model = onnx.optimizer.optimize(onnx_model, passes)
+    return onnx_model
+
+
+def _op_stats(net_def):
+    type_count = {}
+    for t in [op.type for op in net_def.op]:
+        type_count[t] = type_count.get(t, 0) + 1
+    type_count_list = sorted(type_count.items(), key=lambda kv: kv[0])  # alphabet
+    type_count_list = sorted(type_count_list, key=lambda kv: -kv[1])  # count
+    return "\n".join("{:>4}x {}".format(count, name) for name, count in type_count_list)
+
+
+def _assign_device_option(
+    predict_net: caffe2_pb2.NetDef, init_net: caffe2_pb2.NetDef, tensor_inputs: List[torch.Tensor]
+):
+    """
+    ONNX exported network doesn't have concept of device, assign necessary
+    device option for each op in order to make it runable on GPU runtime.
+    """
+
+    def _get_device_type(torch_tensor):
+        assert torch_tensor.device.type in ["cpu", "cuda"]
+        assert torch_tensor.device.index == 0
+        return torch_tensor.device.type
+
+    def _assign_op_device_option(net_proto, net_ssa, blob_device_types):
+        for op, ssa_i in zip(net_proto.op, net_ssa):
+            if op.type in ["CopyCPUToGPU", "CopyGPUToCPU"]:
+                op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
+            else:
+                devices = [blob_device_types[b] for b in ssa_i[0] + ssa_i[1]]
+                assert all(d == devices[0] for d in devices)
+                if devices[0] == "cuda":
+                    op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
+
+    # update ops in predict_net
+    predict_net_input_device_types = {
+        (name, 0): _get_device_type(tensor)
+        for name, tensor in zip(predict_net.external_input, tensor_inputs)
+    }
+    predict_net_device_types = infer_device_type(
+        predict_net, known_status=predict_net_input_device_types, device_name_style="pytorch"
+    )
+    predict_net_ssa, _ = core.get_ssa(predict_net)
+    _assign_op_device_option(predict_net, predict_net_ssa, predict_net_device_types)
+
+    # update ops in init_net
+    init_net_ssa, versions = core.get_ssa(init_net)
+    init_net_output_device_types = {
+        (name, versions[name]): predict_net_device_types[(name, 0)]
+        for name in init_net.external_output
+    }
+    init_net_device_types = infer_device_type(
+        init_net, known_status=init_net_output_device_types, device_name_style="pytorch"
+    )
+    _assign_op_device_option(init_net, init_net_ssa, init_net_device_types)
+
+
+def export_caffe2_detection_model(model: torch.nn.Module, tensor_inputs: List[torch.Tensor]):
+    """
+    Export a caffe2-compatible Detectron2 model to caffe2 format via ONNX.
+
+    Arg:
+        model: a caffe2-compatible version of detectron2 model, defined in caffe2_modeling.py
+        tensor_inputs: a list of tensors that caffe2 model takes as input.
+    """
+    model = copy.deepcopy(model)
+    assert isinstance(model, torch.nn.Module)
+    assert hasattr(model, "encode_additional_info")
+
+    # Export via ONNX
+    logger.info(
+        "Exporting a {} model via ONNX ...".format(type(model).__name__)
+        + " Some warnings from ONNX are expected and are usually not to worry about."
+    )
+    onnx_model = export_onnx_model(model, (tensor_inputs,))
+    # Convert ONNX model to Caffe2 protobuf
+    init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model)
+    ops_table = [[op.type, op.input, op.output] for op in predict_net.op]
+    table = tabulate(ops_table, headers=["type", "input", "output"], tablefmt="pipe")
+    logger.info(
+        "ONNX export Done. Exported predict_net (before optimizations):\n" + colored(table, "cyan")
+    )
+
+    # Apply protobuf optimization
+    fuse_alias_placeholder(predict_net, init_net)
+    if any(t.device.type != "cpu" for t in tensor_inputs):
+        fuse_copy_between_cpu_and_gpu(predict_net)
+        remove_dead_end_ops(init_net)
+        _assign_device_option(predict_net, init_net, tensor_inputs)
+    params, device_options = get_params_from_init_net(init_net)
+    predict_net, params = remove_reshape_for_fc(predict_net, params)
+    init_net = construct_init_net_from_params(params, device_options)
+    group_norm_replace_aten_with_caffe2(predict_net)
+
+    # Record necessary information for running the pb model in Detectron2 system.
+    model.encode_additional_info(predict_net, init_net)
+
+    logger.info("Operators used in predict_net: \n{}".format(_op_stats(predict_net)))
+    logger.info("Operators used in init_net: \n{}".format(_op_stats(init_net)))
+
+    return predict_net, init_net
+
+
+def run_and_save_graph(predict_net, init_net, tensor_inputs, graph_save_path):
+    """
+    Run the caffe2 model on given inputs, recording the shape and draw the graph.
+
+    predict_net/init_net: caffe2 model.
+    tensor_inputs: a list of tensors that caffe2 model takes as input.
+    graph_save_path: path for saving graph of exported model.
+    """
+
+    logger.info("Saving graph of ONNX exported model to {} ...".format(graph_save_path))
+    save_graph(predict_net, graph_save_path, op_only=False)
+
+    # Run the exported Caffe2 net
+    logger.info("Running ONNX exported model ...")
+    with ScopedWS("__ws_tmp__", True) as ws:
+        ws.RunNetOnce(init_net)
+        initialized_blobs = set(ws.Blobs())
+        uninitialized = [inp for inp in predict_net.external_input if inp not in initialized_blobs]
+        for name, blob in zip(uninitialized, tensor_inputs):
+            ws.FeedBlob(name, blob)
+
+        try:
+            ws.RunNetOnce(predict_net)
+        except RuntimeError as e:
+            logger.warning("Encountered RuntimeError: \n{}".format(str(e)))
+
+        ws_blobs = {b: ws.FetchBlob(b) for b in ws.Blobs()}
+        blob_sizes = {b: ws_blobs[b].shape for b in ws_blobs if isinstance(ws_blobs[b], np.ndarray)}
+
+        logger.info("Saving graph with blob shapes to {} ...".format(graph_save_path))
+        save_graph(predict_net, graph_save_path, op_only=False, blob_sizes=blob_sizes)
+
+        return ws_blobs
diff --git a/src/sts/detectron2/export/caffe2_inference.py b/src/sts/detectron2/export/caffe2_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..deb886c0417285ed1d5ad85eb941fa1ac757cdab
--- /dev/null
+++ b/src/sts/detectron2/export/caffe2_inference.py
@@ -0,0 +1,161 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+import numpy as np
+from itertools import count
+import torch
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+
+from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
+from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type
+
+logger = logging.getLogger(__name__)
+
+
+# ===== ref: mobile-vision predictor's 'Caffe2Wrapper' class ======
+class ProtobufModel(torch.nn.Module):
+    """
+    Wrapper of a caffe2's protobuf model.
+    It works just like nn.Module, but running caffe2 under the hood.
+    Input/Output are tuple[tensor] that match the caffe2 net's external_input/output.
+    """
+
+    _ids = count(0)
+
+    def __init__(self, predict_net, init_net):
+        logger.info(f"Initializing ProtobufModel for: {predict_net.name} ...")
+        super().__init__()
+        assert isinstance(predict_net, caffe2_pb2.NetDef)
+        assert isinstance(init_net, caffe2_pb2.NetDef)
+        # create unique temporary workspace for each instance
+        self.ws_name = "__tmp_ProtobufModel_{}__".format(next(self._ids))
+        self.net = core.Net(predict_net)
+
+        logger.info("Running init_net once to fill the parameters ...")
+        with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws:
+            ws.RunNetOnce(init_net)
+            uninitialized_external_input = []
+            for blob in self.net.Proto().external_input:
+                if blob not in ws.Blobs():
+                    uninitialized_external_input.append(blob)
+                    ws.CreateBlob(blob)
+            ws.CreateNet(self.net)
+
+        self._error_msgs = set()
+        self._input_blobs = uninitialized_external_input
+
+    def _infer_output_devices(self, inputs):
+        """
+        Returns:
+            list[str]: list of device for each external output
+        """
+
+        def _get_device_type(torch_tensor):
+            assert torch_tensor.device.type in ["cpu", "cuda"]
+            assert torch_tensor.device.index == 0
+            return torch_tensor.device.type
+
+        predict_net = self.net.Proto()
+        input_device_types = {
+            (name, 0): _get_device_type(tensor) for name, tensor in zip(self._input_blobs, inputs)
+        }
+        device_type_map = infer_device_type(
+            predict_net, known_status=input_device_types, device_name_style="pytorch"
+        )
+        ssa, versions = core.get_ssa(predict_net)
+        versioned_outputs = [(name, versions[name]) for name in predict_net.external_output]
+        output_devices = [device_type_map[outp] for outp in versioned_outputs]
+        return output_devices
+
+    def forward(self, inputs):
+        """
+        Args:
+            inputs (tuple[torch.Tensor])
+
+        Returns:
+            tuple[torch.Tensor]
+        """
+        assert len(inputs) == len(self._input_blobs), (
+            f"Length of inputs ({len(inputs)}) "
+            f"doesn't match the required input blobs: {self._input_blobs}"
+        )
+
+        with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws:
+            for b, tensor in zip(self._input_blobs, inputs):
+                ws.FeedBlob(b, tensor)
+
+            try:
+                ws.RunNet(self.net.Proto().name)
+            except RuntimeError as e:
+                if not str(e) in self._error_msgs:
+                    self._error_msgs.add(str(e))
+                    logger.warning("Encountered new RuntimeError: \n{}".format(str(e)))
+                logger.warning("Catch the error and use partial results.")
+
+            c2_outputs = [ws.FetchBlob(b) for b in self.net.Proto().external_output]
+            # Remove outputs of current run, this is necessary in order to
+            # prevent fetching the result from previous run if the model fails
+            # in the middle.
+            for b in self.net.Proto().external_output:
+                # Needs to create uninitialized blob to make the net runable.
+                # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b),
+                # but there'no such API.
+                ws.FeedBlob(b, f"{b}, a C++ native class of type nullptr (uninitialized).")
+
+        # Cast output to torch.Tensor on the desired device
+        output_devices = (
+            self._infer_output_devices(inputs)
+            if any(t.device.type != "cpu" for t in inputs)
+            else ["cpu" for _ in self.net.Proto().external_output]
+        )
+
+        outputs = []
+        for name, c2_output, device in zip(
+            self.net.Proto().external_output, c2_outputs, output_devices
+        ):
+            if not isinstance(c2_output, np.ndarray):
+                raise RuntimeError(
+                    "Invalid output for blob {}, received: {}".format(name, c2_output)
+                )
+            outputs.append(torch.tensor(c2_output).to(device=device))
+        return tuple(outputs)
+
+
+class ProtobufDetectionModel(torch.nn.Module):
+    """
+    A class works just like a pytorch meta arch in terms of inference, but running
+    caffe2 model under the hood.
+    """
+
+    def __init__(self, predict_net, init_net, *, convert_outputs=None):
+        """
+        Args:
+            predict_net, init_net (core.Net): caffe2 nets
+            convert_outptus (callable): a function that converts caffe2
+                outputs to the same format of the original pytorch model.
+                By default, use the one defined in the caffe2 meta_arch.
+        """
+        super().__init__()
+        self.protobuf_model = ProtobufModel(predict_net, init_net)
+        self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0)
+        self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii")
+
+        if convert_outputs is None:
+            meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN")
+            meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")]
+            self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net)
+        else:
+            self._convert_outputs = convert_outputs
+
+    def _convert_inputs(self, batched_inputs):
+        # currently all models convert inputs in the same way
+        return convert_batched_inputs_to_c2_format(
+            batched_inputs, self.size_divisibility, self.device
+        )
+
+    def forward(self, batched_inputs):
+        c2_inputs = self._convert_inputs(batched_inputs)
+        c2_results = self.protobuf_model(c2_inputs)
+        c2_results = dict(zip(self.protobuf_model.net.Proto().external_output, c2_results))
+        return self._convert_outputs(batched_inputs, c2_inputs, c2_results)
diff --git a/src/sts/detectron2/export/caffe2_modeling.py b/src/sts/detectron2/export/caffe2_modeling.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a9fc78164c32f6709245d3a456af19ffde7c497
--- /dev/null
+++ b/src/sts/detectron2/export/caffe2_modeling.py
@@ -0,0 +1,503 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import functools
+import io
+import struct
+import types
+import torch
+
+from detectron2.modeling import meta_arch
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.meta_arch.panoptic_fpn import combine_semantic_and_instance_outputs
+from detectron2.modeling.meta_arch.retinanet import permute_to_N_HWA_K
+from detectron2.modeling.postprocessing import detector_postprocess, sem_seg_postprocess
+from detectron2.modeling.roi_heads import keypoint_head
+from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
+
+from .c10 import Caffe2Compatible
+from .caffe2_patch import ROIHeadsPatcher, patch_generalized_rcnn
+from .shared import (
+    alias,
+    check_set_pb_arg,
+    get_pb_arg_floats,
+    get_pb_arg_valf,
+    get_pb_arg_vali,
+    get_pb_arg_vals,
+    mock_torch_nn_functional_interpolate,
+)
+
+
+def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False):
+    """
+    A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor])
+    to detectron2's format (i.e. list of Instances instance).
+    This only works when the model follows the Caffe2 detectron's naming convention.
+
+    Args:
+        image_sizes (List[List[int, int]]): [H, W] of every image.
+        tensor_outputs (Dict[str, Tensor]): external_output to its tensor.
+
+        force_mask_on (Bool): if true, the it make sure there'll be pred_masks even
+            if the mask is not found from tensor_outputs (usually due to model crash)
+    """
+
+    results = [Instances(image_size) for image_size in image_sizes]
+
+    batch_splits = tensor_outputs.get("batch_splits", None)
+    if batch_splits:
+        raise NotImplementedError()
+    assert len(image_sizes) == 1
+    result = results[0]
+
+    bbox_nms = tensor_outputs["bbox_nms"]
+    score_nms = tensor_outputs["score_nms"]
+    class_nms = tensor_outputs["class_nms"]
+    # Detection will always success because Conv support 0-batch
+    assert bbox_nms is not None
+    assert score_nms is not None
+    assert class_nms is not None
+    if bbox_nms.shape[1] == 5:
+        result.pred_boxes = RotatedBoxes(bbox_nms)
+    else:
+        result.pred_boxes = Boxes(bbox_nms)
+    result.scores = score_nms
+    result.pred_classes = class_nms.to(torch.int64)
+
+    mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None)
+    if mask_fcn_probs is not None:
+        # finish the mask pred
+        mask_probs_pred = mask_fcn_probs
+        num_masks = mask_probs_pred.shape[0]
+        class_pred = result.pred_classes
+        indices = torch.arange(num_masks, device=class_pred.device)
+        mask_probs_pred = mask_probs_pred[indices, class_pred][:, None]
+        result.pred_masks = mask_probs_pred
+    elif force_mask_on:
+        # NOTE: there's no way to know the height/width of mask here, it won't be
+        # used anyway when batch size is 0, so just set them to 0.
+        result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8)
+
+    keypoints_out = tensor_outputs.get("keypoints_out", None)
+    kps_score = tensor_outputs.get("kps_score", None)
+    if keypoints_out is not None:
+        # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob)
+        keypoints_tensor = keypoints_out
+        # NOTE: it's possible that prob is not calculated if "should_output_softmax"
+        # is set to False in HeatmapMaxKeypoint, so just using raw score, seems
+        # it doesn't affect mAP. TODO: check more carefully.
+        keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]]
+        result.pred_keypoints = keypoint_xyp
+    elif kps_score is not None:
+        # keypoint heatmap to sparse data structure
+        pred_keypoint_logits = kps_score
+        keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result])
+
+    return results
+
+
+def _cast_to_f32(f64):
+    return struct.unpack("f", struct.pack("f", f64))[0]
+
+
+def set_caffe2_compatible_tensor_mode(model, enable=True):
+    def _fn(m):
+        if isinstance(m, Caffe2Compatible):
+            m.tensor_mode = enable
+
+    model.apply(_fn)
+
+
+def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device):
+    """
+    See get_caffe2_inputs() below.
+    """
+    assert all(isinstance(x, dict) for x in batched_inputs)
+    assert all(x["image"].dim() == 3 for x in batched_inputs)
+
+    images = [x["image"] for x in batched_inputs]
+    images = ImageList.from_tensors(images, size_divisibility)
+
+    im_info = []
+    for input_per_image, image_size in zip(batched_inputs, images.image_sizes):
+        target_height = input_per_image.get("height", image_size[0])
+        target_width = input_per_image.get("width", image_size[1])  # noqa
+        # NOTE: The scale inside im_info is kept as convention and for providing
+        # post-processing information if further processing is needed. For
+        # current Caffe2 model definitions that don't include post-processing inside
+        # the model, this number is not used.
+        # NOTE: There can be a slight difference between width and height
+        # scales, using a single number can results in numerical difference
+        # compared with D2's post-processing.
+        scale = target_height / image_size[0]
+        im_info.append([image_size[0], image_size[1], scale])
+    im_info = torch.Tensor(im_info)
+
+    return images.tensor.to(device), im_info.to(device)
+
+
+class Caffe2MetaArch(Caffe2Compatible, torch.nn.Module):
+    """
+    Base class for caffe2-compatible implementation of a meta architecture.
+    The forward is traceable and its traced graph can be converted to caffe2
+    graph through ONNX.
+    """
+
+    def __init__(self, cfg, torch_model):
+        """
+        Args:
+            cfg (CfgNode):
+            torch_model (nn.Module): the detectron2 model (meta_arch) to be
+                converted.
+        """
+        super().__init__()
+        self._wrapped_model = torch_model
+        self.eval()
+        set_caffe2_compatible_tensor_mode(self, True)
+
+    def get_caffe2_inputs(self, batched_inputs):
+        """
+        Convert pytorch-style structured inputs to caffe2-style inputs that
+        are tuples of tensors.
+
+        Args:
+            batched_inputs (list[dict]): inputs to a detectron2 model
+                in its standard format. Each dict has "image" (CHW tensor), and optionally
+                "height" and "width".
+
+        Returns:
+            tuple[Tensor]:
+                tuple of tensors that will be the inputs to the
+                :meth:`forward` method. For existing models, the first
+                is an NCHW tensor (padded and batched); the second is
+                a im_info Nx3 tensor, where the rows are
+                (height, width, unused legacy parameter)
+        """
+        return convert_batched_inputs_to_c2_format(
+            batched_inputs,
+            self._wrapped_model.backbone.size_divisibility,
+            self._wrapped_model.device,
+        )
+
+    def encode_additional_info(self, predict_net, init_net):
+        """
+        Save extra metadata that will be used by inference in the output protobuf.
+        """
+        pass
+
+    def forward(self, inputs):
+        """
+        Run the forward in caffe2-style. It has to use caffe2-compatible ops
+        and the method will be used for tracing.
+
+        Args:
+            inputs (tuple[Tensor]): inputs defined by :meth:`get_caffe2_input`.
+                They will be the inputs of the converted caffe2 graph.
+
+        Returns:
+            tuple[Tensor]: output tensors. They will be the outputs of the
+                converted caffe2 graph.
+        """
+        raise NotImplementedError
+
+    def _caffe2_preprocess_image(self, inputs):
+        """
+        Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward.
+        It normalizes the input images, and the final caffe2 graph assumes the
+        inputs have been batched already.
+        """
+        data, im_info = inputs
+        data = alias(data, "data")
+        im_info = alias(im_info, "im_info")
+        mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std
+        normalized_data = (data - mean) / std
+        normalized_data = alias(normalized_data, "normalized_data")
+
+        # Pack (data, im_info) into ImageList which is recognized by self.inference.
+        images = ImageList(tensor=normalized_data, image_sizes=im_info)
+        return images
+
+    @staticmethod
+    def get_outputs_converter(predict_net, init_net):
+        """
+        Creates a function that converts outputs of the caffe2 model to
+        detectron2's standard format.
+        The function uses information in `predict_net` and `init_net` that are
+        available at inferene time. Therefore the function logic can be used in inference.
+
+        The returned function has the following signature:
+
+            def convert(batched_inputs, c2_inputs, c2_results) -> detectron2_outputs
+
+        Where
+
+            * batched_inputs (list[dict]): the original input format of the meta arch
+            * c2_inputs (tuple[Tensor]): the caffe2 inputs.
+            * c2_results (dict[str, Tensor]): the caffe2 output format,
+                corresponding to the outputs of the :meth:`forward` function.
+            * detectron2_outputs: the original output format of the meta arch.
+
+        This function can be used to compare the outputs of the original meta arch and
+        the converted caffe2 graph.
+
+        Returns:
+            callable: a callable of the above signature.
+        """
+        raise NotImplementedError
+
+
+class Caffe2GeneralizedRCNN(Caffe2MetaArch):
+    def __init__(self, cfg, torch_model):
+        assert isinstance(torch_model, meta_arch.GeneralizedRCNN)
+        torch_model = patch_generalized_rcnn(torch_model)
+        super().__init__(cfg, torch_model)
+
+        self.roi_heads_patcher = ROIHeadsPatcher(
+            self._wrapped_model.roi_heads, cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT
+        )
+
+    def encode_additional_info(self, predict_net, init_net):
+        size_divisibility = self._wrapped_model.backbone.size_divisibility
+        check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
+        check_set_pb_arg(
+            predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
+        )
+        check_set_pb_arg(predict_net, "meta_architecture", "s", b"GeneralizedRCNN")
+
+    @mock_torch_nn_functional_interpolate()
+    def forward(self, inputs):
+        if not self.tensor_mode:
+            return self._wrapped_model.inference(inputs)
+        images = self._caffe2_preprocess_image(inputs)
+        features = self._wrapped_model.backbone(images.tensor)
+        proposals, _ = self._wrapped_model.proposal_generator(images, features)
+        with self.roi_heads_patcher.mock_roi_heads():
+            detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals)
+        return tuple(detector_results[0].flatten())
+
+    @staticmethod
+    def get_outputs_converter(predict_net, init_net):
+        def f(batched_inputs, c2_inputs, c2_results):
+            _, im_info = c2_inputs
+            image_sizes = [[int(im[0]), int(im[1])] for im in im_info]
+            results = assemble_rcnn_outputs_by_name(image_sizes, c2_results)
+            return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
+
+        return f
+
+
+class Caffe2PanopticFPN(Caffe2MetaArch):
+    def __init__(self, cfg, torch_model):
+        assert isinstance(torch_model, meta_arch.PanopticFPN)
+        torch_model = patch_generalized_rcnn(torch_model)
+        super().__init__(cfg, torch_model)
+
+        self.roi_heads_patcher = ROIHeadsPatcher(
+            self._wrapped_model.roi_heads, cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT
+        )
+
+    @mock_torch_nn_functional_interpolate()
+    def forward(self, inputs):
+        assert self.tensor_mode
+        images = self._caffe2_preprocess_image(inputs)
+        features = self._wrapped_model.backbone(images.tensor)
+
+        sem_seg_results, _ = self._wrapped_model.sem_seg_head(features)
+        sem_seg_results = alias(sem_seg_results, "sem_seg")
+
+        proposals, _ = self._wrapped_model.proposal_generator(images, features)
+
+        with self.roi_heads_patcher.mock_roi_heads(self.tensor_mode):
+            detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals)
+
+        return tuple(detector_results[0].flatten()) + (sem_seg_results,)
+
+    def encode_additional_info(self, predict_net, init_net):
+        size_divisibility = self._wrapped_model.backbone.size_divisibility
+        check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
+        check_set_pb_arg(
+            predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
+        )
+        check_set_pb_arg(predict_net, "meta_architecture", "s", b"PanopticFPN")
+
+        # Inference parameters:
+        check_set_pb_arg(
+            predict_net,
+            "combine_overlap_threshold",
+            "f",
+            _cast_to_f32(self._wrapped_model.combine_overlap_thresh),
+        )
+        check_set_pb_arg(
+            predict_net,
+            "combine_stuff_area_limit",
+            "i",
+            self._wrapped_model.combine_stuff_area_thresh,
+        )
+        check_set_pb_arg(
+            predict_net,
+            "combine_instances_confidence_threshold",
+            "f",
+            _cast_to_f32(self._wrapped_model.combine_instances_score_thresh),
+        )
+
+    @staticmethod
+    def get_outputs_converter(predict_net, init_net):
+        combine_overlap_threshold = get_pb_arg_valf(predict_net, "combine_overlap_threshold", None)
+        combine_stuff_area_limit = get_pb_arg_vali(predict_net, "combine_stuff_area_limit", None)
+        combine_instances_confidence_threshold = get_pb_arg_valf(
+            predict_net, "combine_instances_confidence_threshold", None
+        )
+
+        def f(batched_inputs, c2_inputs, c2_results):
+            _, im_info = c2_inputs
+            image_sizes = [[int(im[0]), int(im[1])] for im in im_info]
+            detector_results = assemble_rcnn_outputs_by_name(
+                image_sizes, c2_results, force_mask_on=True
+            )
+            sem_seg_results = c2_results["sem_seg"]
+
+            # copied from meta_arch/panoptic_fpn.py ...
+            processed_results = []
+            for sem_seg_result, detector_result, input_per_image, image_size in zip(
+                sem_seg_results, detector_results, batched_inputs, image_sizes
+            ):
+                height = input_per_image.get("height", image_size[0])
+                width = input_per_image.get("width", image_size[1])
+                sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
+                detector_r = detector_postprocess(detector_result, height, width)
+
+                processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r})
+
+                panoptic_r = combine_semantic_and_instance_outputs(
+                    detector_r,
+                    sem_seg_r.argmax(dim=0),
+                    combine_overlap_threshold,
+                    combine_stuff_area_limit,
+                    combine_instances_confidence_threshold,
+                )
+                processed_results[-1]["panoptic_seg"] = panoptic_r
+            return processed_results
+
+        return f
+
+
+class Caffe2RetinaNet(Caffe2MetaArch):
+    def __init__(self, cfg, torch_model):
+        assert isinstance(torch_model, meta_arch.RetinaNet)
+        super().__init__(cfg, torch_model)
+
+    @mock_torch_nn_functional_interpolate()
+    def forward(self, inputs):
+        assert self.tensor_mode
+        images = self._caffe2_preprocess_image(inputs)
+
+        # explicitly return the images sizes to avoid removing "im_info" by ONNX
+        # since it's not used in the forward path
+        return_tensors = [images.image_sizes]
+
+        features = self._wrapped_model.backbone(images.tensor)
+        features = [features[f] for f in self._wrapped_model.head_in_features]
+        for i, feature_i in enumerate(features):
+            features[i] = alias(feature_i, "feature_{}".format(i), is_backward=True)
+            return_tensors.append(features[i])
+
+        pred_logits, pred_anchor_deltas = self._wrapped_model.head(features)
+        for i, (box_cls_i, box_delta_i) in enumerate(zip(pred_logits, pred_anchor_deltas)):
+            return_tensors.append(alias(box_cls_i, "box_cls_{}".format(i)))
+            return_tensors.append(alias(box_delta_i, "box_delta_{}".format(i)))
+
+        return tuple(return_tensors)
+
+    def encode_additional_info(self, predict_net, init_net):
+        size_divisibility = self._wrapped_model.backbone.size_divisibility
+        check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
+        check_set_pb_arg(
+            predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
+        )
+        check_set_pb_arg(predict_net, "meta_architecture", "s", b"RetinaNet")
+
+        # Inference parameters:
+        check_set_pb_arg(
+            predict_net, "score_threshold", "f", _cast_to_f32(self._wrapped_model.test_score_thresh)
+        )
+        check_set_pb_arg(
+            predict_net, "topk_candidates", "i", self._wrapped_model.test_topk_candidates
+        )
+        check_set_pb_arg(
+            predict_net, "nms_threshold", "f", _cast_to_f32(self._wrapped_model.test_nms_thresh)
+        )
+        check_set_pb_arg(
+            predict_net,
+            "max_detections_per_image",
+            "i",
+            self._wrapped_model.max_detections_per_image,
+        )
+
+        check_set_pb_arg(
+            predict_net,
+            "bbox_reg_weights",
+            "floats",
+            [_cast_to_f32(w) for w in self._wrapped_model.box2box_transform.weights],
+        )
+        self._encode_anchor_generator_cfg(predict_net)
+
+    def _encode_anchor_generator_cfg(self, predict_net):
+        # serialize anchor_generator for future use
+        serialized_anchor_generator = io.BytesIO()
+        torch.save(self._wrapped_model.anchor_generator, serialized_anchor_generator)
+        # Ideally we can put anchor generating inside the model, then we don't
+        # need to store this information.
+        bytes = serialized_anchor_generator.getvalue()
+        check_set_pb_arg(predict_net, "serialized_anchor_generator", "s", bytes)
+
+    @staticmethod
+    def get_outputs_converter(predict_net, init_net):
+        self = types.SimpleNamespace()
+        serialized_anchor_generator = io.BytesIO(
+            get_pb_arg_vals(predict_net, "serialized_anchor_generator", None)
+        )
+        self.anchor_generator = torch.load(serialized_anchor_generator)
+        bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights", None)
+        self.box2box_transform = Box2BoxTransform(weights=tuple(bbox_reg_weights))
+        self.test_score_thresh = get_pb_arg_valf(predict_net, "score_threshold", None)
+        self.test_topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates", None)
+        self.test_nms_thresh = get_pb_arg_valf(predict_net, "nms_threshold", None)
+        self.max_detections_per_image = get_pb_arg_vali(
+            predict_net, "max_detections_per_image", None
+        )
+
+        # hack to reuse inference code from RetinaNet
+        self.inference = functools.partial(meta_arch.RetinaNet.inference, self)
+        self.inference_single_image = functools.partial(
+            meta_arch.RetinaNet.inference_single_image, self
+        )
+
+        def f(batched_inputs, c2_inputs, c2_results):
+            _, im_info = c2_inputs
+            image_sizes = [[int(im[0]), int(im[1])] for im in im_info]
+
+            num_features = len([x for x in c2_results.keys() if x.startswith("box_cls_")])
+            pred_logits = [c2_results["box_cls_{}".format(i)] for i in range(num_features)]
+            pred_anchor_deltas = [c2_results["box_delta_{}".format(i)] for i in range(num_features)]
+
+            # For each feature level, feature should have the same batch size and
+            # spatial dimension as the box_cls and box_delta.
+            dummy_features = [x.clone()[:, 0:0, :, :] for x in pred_logits]
+            anchors = self.anchor_generator(dummy_features)
+
+            # self.num_classess can be inferred
+            self.num_classes = pred_logits[0].shape[1] // (pred_anchor_deltas[0].shape[1] // 4)
+
+            pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits]
+            pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas]
+
+            results = self.inference(anchors, pred_logits, pred_anchor_deltas, image_sizes)
+            return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
+
+        return f
+
+
+META_ARCH_CAFFE2_EXPORT_TYPE_MAP = {
+    "GeneralizedRCNN": Caffe2GeneralizedRCNN,
+    "PanopticFPN": Caffe2PanopticFPN,
+    "RetinaNet": Caffe2RetinaNet,
+}
diff --git a/src/sts/detectron2/export/caffe2_patch.py b/src/sts/detectron2/export/caffe2_patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9eee594a27cdec29ce5f2b6f7730171eda3805e
--- /dev/null
+++ b/src/sts/detectron2/export/caffe2_patch.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import contextlib
+from unittest import mock
+import torch
+
+from detectron2.modeling import poolers
+from detectron2.modeling.proposal_generator import rpn
+from detectron2.modeling.roi_heads import keypoint_head, mask_head
+from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
+
+from .c10 import (
+    Caffe2Compatible,
+    Caffe2FastRCNNOutputsInference,
+    Caffe2KeypointRCNNInference,
+    Caffe2MaskRCNNInference,
+    Caffe2ROIPooler,
+    Caffe2RPN,
+)
+
+
+class GenericMixin(object):
+    pass
+
+
+class Caffe2CompatibleConverter(object):
+    """
+    A GenericUpdater which implements the `create_from` interface, by modifying
+    module object and assign it with another class replaceCls.
+    """
+
+    def __init__(self, replaceCls):
+        self.replaceCls = replaceCls
+
+    def create_from(self, module):
+        # update module's class to the new class
+        assert isinstance(module, torch.nn.Module)
+        if issubclass(self.replaceCls, GenericMixin):
+            # replaceCls should act as mixin, create a new class on-the-fly
+            new_class = type(
+                "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__),
+                (self.replaceCls, module.__class__),
+                {},  # {"new_method": lambda self: ...},
+            )
+            module.__class__ = new_class
+        else:
+            # replaceCls is complete class, this allow arbitrary class swap
+            module.__class__ = self.replaceCls
+
+        # initialize Caffe2Compatible
+        if isinstance(module, Caffe2Compatible):
+            module.tensor_mode = False
+
+        return module
+
+
+def patch(model, target, updater, *args, **kwargs):
+    """
+    recursively (post-order) update all modules with the target type and its
+    subclasses, make a initialization/composition/inheritance/... via the
+    updater.create_from.
+    """
+    for name, module in model.named_children():
+        model._modules[name] = patch(module, target, updater, *args, **kwargs)
+    if isinstance(model, target):
+        return updater.create_from(model, *args, **kwargs)
+    return model
+
+
+def patch_generalized_rcnn(model):
+    ccc = Caffe2CompatibleConverter
+    model = patch(model, rpn.RPN, ccc(Caffe2RPN))
+    model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler))
+
+    return model
+
+
+@contextlib.contextmanager
+def mock_fastrcnn_outputs_inference(
+    tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers
+):
+    with mock.patch.object(
+        box_predictor_type,
+        "inference",
+        autospec=True,
+        side_effect=Caffe2FastRCNNOutputsInference(tensor_mode),
+    ) as mocked_func:
+        yield
+    if check:
+        assert mocked_func.call_count > 0
+
+
+@contextlib.contextmanager
+def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True):
+    with mock.patch(
+        "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference()
+    ) as mocked_func:
+        yield
+    if check:
+        assert mocked_func.call_count > 0
+
+
+@contextlib.contextmanager
+def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True):
+    with mock.patch(
+        "{}.keypoint_rcnn_inference".format(patched_module),
+        side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint),
+    ) as mocked_func:
+        yield
+    if check:
+        assert mocked_func.call_count > 0
+
+
+class ROIHeadsPatcher:
+    def __init__(self, heads, use_heatmap_max_keypoint):
+        self.heads = heads
+        self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
+
+    @contextlib.contextmanager
+    def mock_roi_heads(self, tensor_mode=True):
+        """
+        Patching several inference functions inside ROIHeads and its subclasses
+
+        Args:
+            tensor_mode (bool): whether the inputs/outputs are caffe2's tensor
+                format or not. Default to True.
+        """
+        # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference`
+        # are called inside the same file as BaseXxxHead due to using mock.patch.
+        kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__
+        mask_head_mod = mask_head.BaseMaskRCNNHead.__module__
+
+        mock_ctx_managers = [
+            mock_fastrcnn_outputs_inference(
+                tensor_mode=tensor_mode,
+                check=True,
+                box_predictor_type=type(self.heads.box_predictor),
+            )
+        ]
+        if getattr(self.heads, "keypoint_on", False):
+            mock_ctx_managers += [
+                mock_keypoint_rcnn_inference(
+                    tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint
+                )
+            ]
+        if getattr(self.heads, "mask_on", False):
+            mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)]
+
+        with contextlib.ExitStack() as stack:  # python 3.3+
+            for mgr in mock_ctx_managers:
+                stack.enter_context(mgr)
+            yield
diff --git a/src/sts/detectron2/export/flatten.py b/src/sts/detectron2/export/flatten.py
new file mode 100644
index 0000000000000000000000000000000000000000..b89c0f66897d01f1f04959ba6241e5b0fdbe56c6
--- /dev/null
+++ b/src/sts/detectron2/export/flatten.py
@@ -0,0 +1,327 @@
+import collections
+from dataclasses import dataclass
+from typing import Callable, List, Optional, Tuple
+import torch
+from torch import nn
+
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.registry import _convert_target_to_string, locate
+
+from .torchscript_patch import patch_builtin_len
+
+
+@dataclass
+class Schema:
+    """
+    A Schema defines how to flatten a possibly hierarchical object into tuple of
+    primitive objects, so it can be used as inputs/outputs of PyTorch's tracing.
+
+    PyTorch does not support tracing a function that produces rich output
+    structures (e.g. dict, Instances, Boxes). To trace such a function, we
+    flatten the rich object into tuple of tensors, and return this tuple of tensors
+    instead. Meanwhile, we also need to know how to "rebuild" the original object
+    from the flattened results, so we can evaluate the flattened results.
+    A Schema defines how to flatten an object, and while flattening it, it records
+    necessary schemas so that the object can be rebuilt using the flattened outputs.
+
+    The flattened object and the schema object is returned by ``.flatten`` classmethod.
+    Then the original object can be rebuilt with the ``__call__`` method of schema.
+
+    A Schema is a dataclass that can be serialized easily.
+    """
+
+    # inspired by FetchMapper in tensorflow/python/client/session.py
+
+    @classmethod
+    def flatten(cls, obj):
+        raise NotImplementedError
+
+    def __call__(self, values):
+        raise NotImplementedError
+
+    @staticmethod
+    def _concat(values):
+        ret = ()
+        sizes = []
+        for v in values:
+            assert isinstance(v, tuple), "Flattened results must be a tuple"
+            ret = ret + v
+            sizes.append(len(v))
+        return ret, sizes
+
+    @staticmethod
+    def _split(values, sizes):
+        if len(sizes):
+            expected_len = sum(sizes)
+            assert (
+                len(values) == expected_len
+            ), f"Values has length {len(values)} but expect length {expected_len}."
+        ret = []
+        for k in range(len(sizes)):
+            begin, end = sum(sizes[:k]), sum(sizes[: k + 1])
+            ret.append(values[begin:end])
+        return ret
+
+
+@dataclass
+class ListSchema(Schema):
+    schemas: List[Schema]  # the schemas that define how to flatten each element in the list
+    sizes: List[int]  # the flattened length of each element
+
+    def __call__(self, values):
+        values = self._split(values, self.sizes)
+        if len(values) != len(self.schemas):
+            raise ValueError(
+                f"Values has length {len(values)} but schemas " f"has length {len(self.schemas)}!"
+            )
+        values = [m(v) for m, v in zip(self.schemas, values)]
+        return list(values)
+
+    @classmethod
+    def flatten(cls, obj):
+        res = [flatten_to_tuple(k) for k in obj]
+        values, sizes = cls._concat([k[0] for k in res])
+        return values, cls([k[1] for k in res], sizes)
+
+
+@dataclass
+class TupleSchema(ListSchema):
+    def __call__(self, values):
+        return tuple(super().__call__(values))
+
+
+@dataclass
+class IdentitySchema(Schema):
+    def __call__(self, values):
+        return values[0]
+
+    @classmethod
+    def flatten(cls, obj):
+        return (obj,), cls()
+
+
+@dataclass
+class DictSchema(ListSchema):
+    keys: List[str]
+
+    def __call__(self, values):
+        values = super().__call__(values)
+        return dict(zip(self.keys, values))
+
+    @classmethod
+    def flatten(cls, obj):
+        for k in obj.keys():
+            if not isinstance(k, str):
+                raise KeyError("Only support flattening dictionaries if keys are str.")
+        keys = sorted(obj.keys())
+        values = [obj[k] for k in keys]
+        ret, schema = ListSchema.flatten(values)
+        return ret, cls(schema.schemas, schema.sizes, keys)
+
+
+@dataclass
+class InstancesSchema(DictSchema):
+    def __call__(self, values):
+        image_size, fields = values[-1], values[:-1]
+        fields = super().__call__(fields)
+        return Instances(image_size, **fields)
+
+    @classmethod
+    def flatten(cls, obj):
+        ret, schema = super().flatten(obj.get_fields())
+        size = obj.image_size
+        if not isinstance(size, torch.Tensor):
+            size = torch.tensor(size)
+        return ret + (size,), schema
+
+
+@dataclass
+class TensorWrapSchema(Schema):
+    """
+    For classes that are simple wrapper of tensors, e.g.
+    Boxes, RotatedBoxes, BitMasks
+    """
+
+    class_name: str
+
+    def __call__(self, values):
+        return locate(self.class_name)(values[0])
+
+    @classmethod
+    def flatten(cls, obj):
+        return (obj.tensor,), cls(_convert_target_to_string(type(obj)))
+
+
+# if more custom structures needed in the future, can allow
+# passing in extra schemas for custom types
+def flatten_to_tuple(obj):
+    """
+    Flatten an object so it can be used for PyTorch tracing.
+    Also returns how to rebuild the original object from the flattened outputs.
+
+    Returns:
+        res (tuple): the flattened results that can be used as tracing outputs
+        schema: an object with a ``__call__`` method such that ``schema(res) == obj``.
+             It is a pure dataclass that can be serialized.
+    """
+    schemas = [
+        ((str, bytes), IdentitySchema),
+        (list, ListSchema),
+        (tuple, TupleSchema),
+        (collections.abc.Mapping, DictSchema),
+        (Instances, InstancesSchema),
+        (Boxes, TensorWrapSchema),
+    ]
+    for klass, schema in schemas:
+        if isinstance(obj, klass):
+            F = schema
+            break
+    else:
+        F = IdentitySchema
+
+    return F.flatten(obj)
+
+
+class TracingAdapter(nn.Module):
+    """
+    A model may take rich input/output format (e.g. dict or custom classes),
+    but `torch.jit.trace` requires tuple of tensors as input/output.
+    This adapter flattens input/output format of a model so it becomes traceable.
+
+    It also records the necessary schema to rebuild model's inputs/outputs from flattened
+    inputs/outputs.
+
+    Example:
+    ::
+        outputs = model(inputs)   # inputs/outputs may be rich structure
+        adapter = TracingAdapter(model, inputs)
+
+        # can now trace the model, with adapter.flattened_inputs, or another
+        # tuple of tensors with the same length and meaning
+        traced = torch.jit.trace(adapter, adapter.flattened_inputs)
+
+        # traced model can only produce flattened outputs (tuple of tensors)
+        flattened_outputs = traced(*adapter.flattened_inputs)
+        # adapter knows the schema to convert it back (new_outputs == outputs)
+        new_outputs = adapter.outputs_schema(flattened_outputs)
+    """
+
+    flattened_inputs: Tuple[torch.Tensor] = None
+    """
+    Flattened version of inputs given to this class's constructor.
+    """
+
+    inputs_schema: Schema = None
+    """
+    Schema of the inputs given to this class's constructor.
+    """
+
+    outputs_schema: Schema = None
+    """
+    Schema of the output produced by calling the given model with inputs.
+    """
+
+    def __init__(
+        self,
+        model: nn.Module,
+        inputs,
+        inference_func: Optional[Callable] = None,
+        allow_non_tensor: bool = False,
+    ):
+        """
+        Args:
+            model: an nn.Module
+            inputs: An input argument or a tuple of input arguments used to call model.
+                After flattening, it has to only consist of tensors.
+            inference_func: a callable that takes (model, *inputs), calls the
+                model with inputs, and return outputs. By default it
+                is ``lambda model, *inputs: model(*inputs)``. Can be override
+                if you need to call the model differently.
+            allow_non_tensor: allow inputs/outputs to contain non-tensor objects.
+                This option will filter out non-tensor objects to make the
+                model traceable, but ``inputs_schema``/``outputs_schema`` cannot be
+                used anymore because inputs/outputs cannot be rebuilt from pure tensors.
+                This is useful when you're only interested in the single trace of
+                execution (e.g. for flop count), but not interested in
+                generalizing the traced graph to new inputs.
+        """
+        super().__init__()
+        if isinstance(model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)):
+            model = model.module
+        self.model = model
+        if not isinstance(inputs, tuple):
+            inputs = (inputs,)
+        self.inputs = inputs
+        self.allow_non_tensor = allow_non_tensor
+
+        if inference_func is None:
+            inference_func = lambda model, *inputs: model(*inputs)  # noqa
+        self.inference_func = inference_func
+
+        self.flattened_inputs, self.inputs_schema = flatten_to_tuple(inputs)
+
+        if all(isinstance(x, torch.Tensor) for x in self.flattened_inputs):
+            return
+        if self.allow_non_tensor:
+            self.flattened_inputs = tuple(
+                [x for x in self.flattened_inputs if isinstance(x, torch.Tensor)]
+            )
+            self.inputs_schema = None
+        else:
+            for input in self.flattened_inputs:
+                if not isinstance(input, torch.Tensor):
+                    raise ValueError(
+                        "Inputs for tracing must only contain tensors. "
+                        f"Got a {type(input)} instead."
+                    )
+
+    def forward(self, *args: torch.Tensor):
+        with torch.no_grad(), patch_builtin_len():
+            if self.inputs_schema is not None:
+                inputs_orig_format = self.inputs_schema(args)
+            else:
+                if args != self.flattened_inputs:
+                    raise ValueError(
+                        "TracingAdapter does not contain valid inputs_schema."
+                        " So it cannot generalize to other inputs and must be"
+                        " traced with `.flattened_inputs`."
+                    )
+                inputs_orig_format = self.inputs
+
+            outputs = self.inference_func(self.model, *inputs_orig_format)
+            flattened_outputs, schema = flatten_to_tuple(outputs)
+
+            flattened_output_tensors = tuple(
+                [x for x in flattened_outputs if isinstance(x, torch.Tensor)]
+            )
+            if len(flattened_output_tensors) < len(flattened_outputs):
+                if self.allow_non_tensor:
+                    flattened_outputs = flattened_output_tensors
+                    self.outputs_schema = None
+                else:
+                    raise ValueError(
+                        "Model cannot be traced because some model outputs "
+                        "cannot flatten to tensors."
+                    )
+            else:  # schema is valid
+                if self.outputs_schema is None:
+                    self.outputs_schema = schema
+                else:
+                    assert self.outputs_schema == schema, (
+                        "Model should always return outputs with the same "
+                        "structure so it can be traced!"
+                    )
+            return flattened_outputs
+
+    def _create_wrapper(self, traced_model):
+        """
+        Return a function that has an input/output interface the same as the
+        original model, but it calls the given traced model under the hood.
+        """
+
+        def forward(*args):
+            flattened_inputs, _ = flatten_to_tuple(args)
+            flattened_outputs = traced_model(*flattened_inputs)
+            return self.outputs_schema(flattened_outputs)
+
+        return forward
diff --git a/src/sts/detectron2/export/shared.py b/src/sts/detectron2/export/shared.py
new file mode 100644
index 0000000000000000000000000000000000000000..857cc9711dc175835bd6cfa28f877f70063cb94f
--- /dev/null
+++ b/src/sts/detectron2/export/shared.py
@@ -0,0 +1,1034 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import collections
+import contextlib
+import copy
+import functools
+import logging
+import numpy as np
+import os
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from unittest import mock
+import caffe2.python.utils as putils
+import torch
+import torch.nn.functional as F
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core, net_drawer, workspace
+from torch.nn.functional import interpolate as interp
+
+logger = logging.getLogger(__name__)
+
+
+# ==== torch/utils_toffee/cast.py =======================================
+
+
+def to_device(t, device_str):
+    """
+    This function is a replacement of .to(another_device) such that it allows the
+    casting to be traced properly by explicitly calling the underlying copy ops.
+    It also avoids introducing unncessary op when casting to the same device.
+    """
+    src = t.device
+    dst = torch.device(device_str)
+
+    if src == dst:
+        return t
+    elif src.type == "cuda" and dst.type == "cpu":
+        return torch.ops._caffe2.CopyGPUToCPU(t)
+    elif src.type == "cpu" and dst.type == "cuda":
+        return torch.ops._caffe2.CopyCPUToGPU(t)
+    else:
+        raise RuntimeError("Can't cast tensor from device {} to device {}".format(src, dst))
+
+
+# ==== torch/utils_toffee/interpolate.py =======================================
+
+
+# Note: borrowed from vision/detection/fair/detectron/detectron/modeling/detector.py
+def BilinearInterpolation(tensor_in, up_scale):
+    assert up_scale % 2 == 0, "Scale should be even"
+
+    def upsample_filt(size):
+        factor = (size + 1) // 2
+        if size % 2 == 1:
+            center = factor - 1
+        else:
+            center = factor - 0.5
+
+        og = np.ogrid[:size, :size]
+        return (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
+
+    kernel_size = int(up_scale) * 2
+    bil_filt = upsample_filt(kernel_size)
+
+    dim = int(tensor_in.shape[1])
+    kernel = np.zeros((dim, dim, kernel_size, kernel_size), dtype=np.float32)
+    kernel[range(dim), range(dim), :, :] = bil_filt
+
+    tensor_out = F.conv_transpose2d(
+        tensor_in,
+        weight=to_device(torch.Tensor(kernel), tensor_in.device),
+        bias=None,
+        stride=int(up_scale),
+        padding=int(up_scale / 2),
+    )
+
+    return tensor_out
+
+
+# NOTE: ONNX is incompatible with traced torch.nn.functional.interpolate if
+# using dynamic `scale_factor` rather than static `size`. (T43166860)
+# NOTE: Caffe2 Int8 conversion might not be able to quantize `size` properly.
+def onnx_compatibale_interpolate(
+    input, size=None, scale_factor=None, mode="nearest", align_corners=None
+):
+    # NOTE: The input dimensions are interpreted in the form:
+    # `mini-batch x channels x [optional depth] x [optional height] x width`.
+    if size is None and scale_factor is not None:
+        if input.dim() == 4:
+            if isinstance(scale_factor, (int, float)):
+                height_scale, width_scale = (scale_factor, scale_factor)
+            else:
+                assert isinstance(scale_factor, (tuple, list))
+                assert len(scale_factor) == 2
+                height_scale, width_scale = scale_factor
+
+            assert not align_corners, "No matching C2 op for align_corners == True"
+            if mode == "nearest":
+                return torch.ops._caffe2.ResizeNearest(
+                    input, order="NCHW", width_scale=width_scale, height_scale=height_scale
+                )
+            elif mode == "bilinear":
+                logger.warning(
+                    "Use F.conv_transpose2d for bilinear interpolate"
+                    " because there's no such C2 op, this may cause significant"
+                    " slowdown and the boundary pixels won't be as same as"
+                    " using F.interpolate due to padding."
+                )
+                assert height_scale == width_scale
+                return BilinearInterpolation(input, up_scale=height_scale)
+        logger.warning("Output size is not static, it might cause ONNX conversion issue")
+
+    return interp(input, size, scale_factor, mode, align_corners)
+
+
+@contextlib.contextmanager
+def mock_torch_nn_functional_interpolate():
+    if torch.onnx.is_in_onnx_export():
+        with mock.patch(
+            "torch.nn.functional.interpolate", side_effect=onnx_compatibale_interpolate
+        ):
+            yield
+    else:
+        yield
+
+
+# ==== torch/utils_caffe2/ws_utils.py ==========================================
+
+
+class ScopedWS(object):
+    def __init__(self, ws_name, is_reset, is_cleanup=False):
+        self.ws_name = ws_name
+        self.is_reset = is_reset
+        self.is_cleanup = is_cleanup
+        self.org_ws = ""
+
+    def __enter__(self):
+        self.org_ws = workspace.CurrentWorkspace()
+        if self.ws_name is not None:
+            workspace.SwitchWorkspace(self.ws_name, True)
+        if self.is_reset:
+            workspace.ResetWorkspace()
+
+        return workspace
+
+    def __exit__(self, *args):
+        if self.is_cleanup:
+            workspace.ResetWorkspace()
+        if self.ws_name is not None:
+            workspace.SwitchWorkspace(self.org_ws)
+
+
+def fetch_any_blob(name):
+    bb = None
+    try:
+        bb = workspace.FetchBlob(name)
+    except TypeError:
+        bb = workspace.FetchInt8Blob(name)
+    except Exception as e:
+        logger.error("Get blob {} error: {}".format(name, e))
+
+    return bb
+
+
+# ==== torch/utils_caffe2/protobuf.py ==========================================
+
+
+def get_pb_arg(pb, arg_name):
+    for x in pb.arg:
+        if x.name == arg_name:
+            return x
+    return None
+
+
+def get_pb_arg_valf(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return arg.f if arg is not None else default_val
+
+
+def get_pb_arg_floats(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return list(map(float, arg.floats)) if arg is not None else default_val
+
+
+def get_pb_arg_ints(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return list(map(int, arg.ints)) if arg is not None else default_val
+
+
+def get_pb_arg_vali(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return arg.i if arg is not None else default_val
+
+
+def get_pb_arg_vals(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return arg.s if arg is not None else default_val
+
+
+def get_pb_arg_valstrings(pb, arg_name, default_val):
+    arg = get_pb_arg(pb, arg_name)
+    return list(arg.strings) if arg is not None else default_val
+
+
+def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False):
+    arg = get_pb_arg(pb, arg_name)
+    if arg is None:
+        arg = putils.MakeArgument(arg_name, arg_value)
+        assert hasattr(arg, arg_attr)
+        pb.arg.extend([arg])
+    if allow_override and getattr(arg, arg_attr) != arg_value:
+        logger.warning(
+            "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value)
+        )
+        setattr(arg, arg_attr, arg_value)
+    else:
+        assert arg is not None
+        assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format(
+            getattr(arg, arg_attr), arg_value
+        )
+
+
+def _create_const_fill_op_from_numpy(name, tensor, device_option=None):
+    assert type(tensor) == np.ndarray
+    kTypeNameMapper = {
+        np.dtype("float32"): "GivenTensorFill",
+        np.dtype("int32"): "GivenTensorIntFill",
+        np.dtype("int64"): "GivenTensorInt64Fill",
+        np.dtype("uint8"): "GivenTensorStringFill",
+    }
+
+    args_dict = {}
+    if tensor.dtype == np.dtype("uint8"):
+        args_dict.update({"values": [str(tensor.data)], "shape": [1]})
+    else:
+        args_dict.update({"values": tensor, "shape": tensor.shape})
+
+    if device_option is not None:
+        args_dict["device_option"] = device_option
+
+    return core.CreateOperator(kTypeNameMapper[tensor.dtype], [], [name], **args_dict)
+
+
+def _create_const_fill_op_from_c2_int8_tensor(name, int8_tensor):
+    assert type(int8_tensor) == workspace.Int8Tensor
+    kTypeNameMapper = {
+        np.dtype("int32"): "Int8GivenIntTensorFill",
+        np.dtype("uint8"): "Int8GivenTensorFill",
+    }
+
+    tensor = int8_tensor.data
+    assert tensor.dtype in [np.dtype("uint8"), np.dtype("int32")]
+    values = tensor.tobytes() if tensor.dtype == np.dtype("uint8") else tensor
+
+    return core.CreateOperator(
+        kTypeNameMapper[tensor.dtype],
+        [],
+        [name],
+        values=values,
+        shape=tensor.shape,
+        Y_scale=int8_tensor.scale,
+        Y_zero_point=int8_tensor.zero_point,
+    )
+
+
+def create_const_fill_op(
+    name: str,
+    blob: Union[np.ndarray, workspace.Int8Tensor],
+    device_option: Optional[caffe2_pb2.DeviceOption] = None,
+) -> caffe2_pb2.OperatorDef:
+    """
+    Given a blob object, return the Caffe2 operator that creates this blob
+    as constant. Currently support NumPy tensor and Caffe2 Int8Tensor.
+    """
+
+    tensor_type = type(blob)
+    assert tensor_type in [
+        np.ndarray,
+        workspace.Int8Tensor,
+    ], 'Error when creating const fill op for "{}", unsupported blob type: {}'.format(
+        name, type(blob)
+    )
+
+    if tensor_type == np.ndarray:
+        return _create_const_fill_op_from_numpy(name, blob, device_option)
+    elif tensor_type == workspace.Int8Tensor:
+        assert device_option is None
+        return _create_const_fill_op_from_c2_int8_tensor(name, blob)
+
+
+def construct_init_net_from_params(
+    params: Dict[str, Any], device_options: Optional[Dict[str, caffe2_pb2.DeviceOption]] = None
+) -> caffe2_pb2.NetDef:
+    """
+    Construct the init_net from params dictionary
+    """
+    init_net = caffe2_pb2.NetDef()
+    device_options = device_options or {}
+    for name, blob in params.items():
+        if isinstance(blob, str):
+            logger.warning(
+                (
+                    "Blob {} with type {} is not supported in generating init net,"
+                    " skipped.".format(name, type(blob))
+                )
+            )
+            continue
+        init_net.op.extend(
+            [create_const_fill_op(name, blob, device_option=device_options.get(name, None))]
+        )
+        init_net.external_output.append(name)
+    return init_net
+
+
+def get_producer_map(ssa):
+    """
+    Return dict from versioned blob to (i, j),
+        where i is index of producer op, j is the index of output of that op.
+    """
+    producer_map = {}
+    for i in range(len(ssa)):
+        outputs = ssa[i][1]
+        for j, outp in enumerate(outputs):
+            producer_map[outp] = (i, j)
+    return producer_map
+
+
+def get_consumer_map(ssa):
+    """
+    Return dict from versioned blob to list of (i, j),
+        where i is index of consumer op, j is the index of input of that op.
+    """
+    consumer_map = collections.defaultdict(list)
+    for i in range(len(ssa)):
+        inputs = ssa[i][0]
+        for j, inp in enumerate(inputs):
+            consumer_map[inp].append((i, j))
+    return consumer_map
+
+
+def get_params_from_init_net(
+    init_net: caffe2_pb2.NetDef,
+) -> [Dict[str, Any], Dict[str, caffe2_pb2.DeviceOption]]:
+    """
+    Take the output blobs from init_net by running it.
+    Outputs:
+        params: dict from blob name to numpy array
+        device_options: dict from blob name to the device option of its creating op
+    """
+    # NOTE: this assumes that the params is determined by producer op with the
+    # only exception be CopyGPUToCPU which is CUDA op but returns CPU tensor.
+    def _get_device_option(producer_op):
+        if producer_op.type == "CopyGPUToCPU":
+            return caffe2_pb2.DeviceOption()
+        else:
+            return producer_op.device_option
+
+    with ScopedWS("__get_params_from_init_net__", is_reset=True, is_cleanup=True) as ws:
+        ws.RunNetOnce(init_net)
+        params = {b: fetch_any_blob(b) for b in init_net.external_output}
+    ssa, versions = core.get_ssa(init_net)
+    producer_map = get_producer_map(ssa)
+    device_options = {
+        b: _get_device_option(init_net.op[producer_map[(b, versions[b])][0]])
+        for b in init_net.external_output
+    }
+    return params, device_options
+
+
+def _updater_raise(op, input_types, output_types):
+    raise RuntimeError(
+        "Failed to apply updater for op {} given input_types {} and"
+        " output_types {}".format(op, input_types, output_types)
+    )
+
+
+def _generic_status_identifier(
+    predict_net: caffe2_pb2.NetDef,
+    status_updater: Callable,
+    known_status: Dict[Tuple[str, int], Any],
+) -> Dict[Tuple[str, int], Any]:
+    """
+    Statically infer the status of each blob, the status can be such as device type
+        (CPU/GPU), layout (NCHW/NHWC), data type (float32/int8), etc. "Blob" here
+        is versioned blob (Tuple[str, int]) in the format compatible with ssa.
+    Inputs:
+        predict_net: the caffe2 network
+        status_updater: a callable, given an op and the status of its input/output,
+            it returns the updated status of input/output. `None` is used for
+            representing unknown status.
+        known_status: a dict containing known status, used as initialization.
+    Outputs:
+        A dict mapping from versioned blob to its status
+    """
+    ssa, versions = core.get_ssa(predict_net)
+    versioned_ext_input = [(b, 0) for b in predict_net.external_input]
+    versioned_ext_output = [(b, versions[b]) for b in predict_net.external_output]
+    all_versioned_blobs = set().union(*[set(x[0] + x[1]) for x in ssa])
+
+    allowed_vbs = all_versioned_blobs.union(versioned_ext_input).union(versioned_ext_output)
+    assert all(k in allowed_vbs for k in known_status)
+    assert all(v is not None for v in known_status.values())
+    _known_status = copy.deepcopy(known_status)
+
+    def _check_and_update(key, value):
+        assert value is not None
+        if key in _known_status:
+            if not _known_status[key] == value:
+                raise RuntimeError(
+                    "Confilict status for {}, existing status {}, new status {}".format(
+                        key, _known_status[key], value
+                    )
+                )
+        _known_status[key] = value
+
+    def _update_i(op, ssa_i):
+        versioned_inputs = ssa_i[0]
+        versioned_outputs = ssa_i[1]
+
+        inputs_status = [_known_status.get(b, None) for b in versioned_inputs]
+        outputs_status = [_known_status.get(b, None) for b in versioned_outputs]
+
+        new_inputs_status, new_outputs_status = status_updater(op, inputs_status, outputs_status)
+
+        for versioned_blob, status in zip(
+            versioned_inputs + versioned_outputs, new_inputs_status + new_outputs_status
+        ):
+            if status is not None:
+                _check_and_update(versioned_blob, status)
+
+    for op, ssa_i in zip(predict_net.op, ssa):
+        _update_i(op, ssa_i)
+    for op, ssa_i in zip(reversed(predict_net.op), reversed(ssa)):
+        _update_i(op, ssa_i)
+
+    # NOTE: This strictly checks all the blob from predict_net must be assgined
+    # a known status. However sometimes it's impossible (eg. having deadend op),
+    # we may relax this constraint if
+    for k in all_versioned_blobs:
+        if k not in _known_status:
+            raise NotImplementedError(
+                "Can not infer the status for {}. Currently only support the case where"
+                " a single forward and backward pass can identify status for all blobs.".format(k)
+            )
+
+    return _known_status
+
+
+def infer_device_type(
+    predict_net: caffe2_pb2.NetDef,
+    known_status: Dict[Tuple[str, int], Any],
+    device_name_style: str = "caffe2",
+) -> Dict[Tuple[str, int], str]:
+    """ Return the device type ("cpu" or "gpu"/"cuda") of each (versioned) blob """
+
+    assert device_name_style in ["caffe2", "pytorch"]
+    _CPU_STR = "cpu"
+    _GPU_STR = "gpu" if device_name_style == "caffe2" else "cuda"
+
+    def _copy_cpu_to_gpu_updater(op, input_types, output_types):
+        if input_types[0] == _GPU_STR or output_types[0] == _CPU_STR:
+            _updater_raise(op, input_types, output_types)
+        return ([_CPU_STR], [_GPU_STR])
+
+    def _copy_gpu_to_cpu_updater(op, input_types, output_types):
+        if input_types[0] == _CPU_STR or output_types[0] == _GPU_STR:
+            _updater_raise(op, input_types, output_types)
+        return ([_GPU_STR], [_CPU_STR])
+
+    def _other_ops_updater(op, input_types, output_types):
+        non_none_types = [x for x in input_types + output_types if x is not None]
+        if len(non_none_types) > 0:
+            the_type = non_none_types[0]
+            if not all(x == the_type for x in non_none_types):
+                _updater_raise(op, input_types, output_types)
+        else:
+            the_type = None
+        return ([the_type for _ in op.input], [the_type for _ in op.output])
+
+    def _device_updater(op, *args, **kwargs):
+        return {
+            "CopyCPUToGPU": _copy_cpu_to_gpu_updater,
+            "CopyGPUToCPU": _copy_gpu_to_cpu_updater,
+        }.get(op.type, _other_ops_updater)(op, *args, **kwargs)
+
+    return _generic_status_identifier(predict_net, _device_updater, known_status)
+
+
+# ==== torch/utils_caffe2/vis.py ===============================================
+
+
+def _modify_blob_names(ops, blob_rename_f):
+    ret = []
+
+    def _replace_list(blob_list, replaced_list):
+        del blob_list[:]
+        blob_list.extend(replaced_list)
+
+    for x in ops:
+        cur = copy.deepcopy(x)
+        _replace_list(cur.input, list(map(blob_rename_f, cur.input)))
+        _replace_list(cur.output, list(map(blob_rename_f, cur.output)))
+        ret.append(cur)
+
+    return ret
+
+
+def _rename_blob(name, blob_sizes, blob_ranges):
+    def _list_to_str(bsize):
+        ret = ", ".join([str(x) for x in bsize])
+        ret = "[" + ret + "]"
+        return ret
+
+    ret = name
+    if blob_sizes is not None and name in blob_sizes:
+        ret += "\n" + _list_to_str(blob_sizes[name])
+    if blob_ranges is not None and name in blob_ranges:
+        ret += "\n" + _list_to_str(blob_ranges[name])
+
+    return ret
+
+
+# graph_name could not contain word 'graph'
+def save_graph(net, file_name, graph_name="net", op_only=True, blob_sizes=None, blob_ranges=None):
+    blob_rename_f = functools.partial(_rename_blob, blob_sizes=blob_sizes, blob_ranges=blob_ranges)
+    return save_graph_base(net, file_name, graph_name, op_only, blob_rename_f)
+
+
+def save_graph_base(net, file_name, graph_name="net", op_only=True, blob_rename_func=None):
+    graph = None
+    ops = net.op
+    if blob_rename_func is not None:
+        ops = _modify_blob_names(ops, blob_rename_func)
+    if not op_only:
+        graph = net_drawer.GetPydotGraph(ops, graph_name, rankdir="TB")
+    else:
+        graph = net_drawer.GetPydotGraphMinimal(
+            ops, graph_name, rankdir="TB", minimal_dependency=True
+        )
+
+    try:
+        par_dir = os.path.dirname(file_name)
+        if not os.path.exists(par_dir):
+            os.makedirs(par_dir)
+
+        format = os.path.splitext(os.path.basename(file_name))[-1]
+        if format == ".png":
+            graph.write_png(file_name)
+        elif format == ".pdf":
+            graph.write_pdf(file_name)
+        elif format == ".svg":
+            graph.write_svg(file_name)
+        else:
+            print("Incorrect format {}".format(format))
+    except Exception as e:
+        print("Error when writing graph to image {}".format(e))
+
+    return graph
+
+
+# ==== torch/utils_toffee/aten_to_caffe2.py ====================================
+
+
+def group_norm_replace_aten_with_caffe2(predict_net: caffe2_pb2.NetDef):
+    """
+    For ONNX exported model, GroupNorm will be represented as ATen op,
+        this can be a drop in replacement from ATen to GroupNorm
+    """
+    count = 0
+    for op in predict_net.op:
+        if op.type == "ATen":
+            op_name = get_pb_arg_vals(op, "operator", None)  # return byte in py3
+            if op_name and op_name.decode() == "group_norm":
+                op.arg.remove(get_pb_arg(op, "operator"))
+
+                if get_pb_arg_vali(op, "cudnn_enabled", None):
+                    op.arg.remove(get_pb_arg(op, "cudnn_enabled"))
+
+                num_groups = get_pb_arg_vali(op, "num_groups", None)
+                if num_groups is not None:
+                    op.arg.remove(get_pb_arg(op, "num_groups"))
+                    check_set_pb_arg(op, "group", "i", num_groups)
+
+                op.type = "GroupNorm"
+                count += 1
+    if count > 1:
+        logger.info("Replaced {} ATen operator to GroupNormOp".format(count))
+
+
+# ==== torch/utils_toffee/alias.py =============================================
+
+
+def alias(x, name, is_backward=False):
+    if not torch.onnx.is_in_onnx_export():
+        return x
+    assert isinstance(x, torch.Tensor)
+    return torch.ops._caffe2.AliasWithName(x, name, is_backward=is_backward)
+
+
+def fuse_alias_placeholder(predict_net, init_net):
+    """ Remove AliasWithName placeholder and rename the input/output of it """
+    # First we finish all the re-naming
+    for i, op in enumerate(predict_net.op):
+        if op.type == "AliasWithName":
+            assert len(op.input) == 1
+            assert len(op.output) == 1
+            name = get_pb_arg_vals(op, "name", None).decode()
+            is_backward = bool(get_pb_arg_vali(op, "is_backward", 0))
+            rename_op_input(predict_net, init_net, i, 0, name, from_producer=is_backward)
+            rename_op_output(predict_net, i, 0, name)
+
+    # Remove AliasWithName, should be very safe since it's a non-op
+    new_ops = []
+    for op in predict_net.op:
+        if op.type != "AliasWithName":
+            new_ops.append(op)
+        else:
+            # safety check
+            assert op.input == op.output
+            assert op.input[0] == op.arg[0].s.decode()
+    del predict_net.op[:]
+    predict_net.op.extend(new_ops)
+
+
+# ==== torch/utils_caffe2/graph_transform.py ===================================
+
+
+class IllegalGraphTransformError(ValueError):
+    """ When a graph transform function call can't be executed. """
+
+
+def _rename_versioned_blob_in_proto(
+    proto: caffe2_pb2.NetDef,
+    old_name: str,
+    new_name: str,
+    version: int,
+    ssa: List[Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]],
+    start_versions: Dict[str, int],
+    end_versions: Dict[str, int],
+):
+    """ In given proto, rename all blobs with matched version """
+    # Operater list
+    for op, i_th_ssa in zip(proto.op, ssa):
+        versioned_inputs, versioned_outputs = i_th_ssa
+        for i in range(len(op.input)):
+            if versioned_inputs[i] == (old_name, version):
+                op.input[i] = new_name
+        for i in range(len(op.output)):
+            if versioned_outputs[i] == (old_name, version):
+                op.output[i] = new_name
+    # external_input
+    if start_versions.get(old_name, 0) == version:
+        for i in range(len(proto.external_input)):
+            if proto.external_input[i] == old_name:
+                proto.external_input[i] = new_name
+    # external_output
+    if end_versions.get(old_name, 0) == version:
+        for i in range(len(proto.external_output)):
+            if proto.external_output[i] == old_name:
+                proto.external_output[i] = new_name
+
+
+def rename_op_input(
+    predict_net: caffe2_pb2.NetDef,
+    init_net: caffe2_pb2.NetDef,
+    op_id: int,
+    input_id: int,
+    new_name: str,
+    from_producer: bool = False,
+):
+    """
+    Rename the op_id-th operator in predict_net, change it's input_id-th input's
+        name to the new_name. It also does automatic re-route and change
+        external_input and init_net if necessary.
+    - It requires the input is only consumed by this op.
+    - This function modifies predict_net and init_net in-place.
+    - When from_producer is enable, this also updates other operators that consumes
+        the same input. Be cautious because may trigger unintended behavior.
+    """
+    assert isinstance(predict_net, caffe2_pb2.NetDef)
+    assert isinstance(init_net, caffe2_pb2.NetDef)
+
+    init_net_ssa, init_net_versions = core.get_ssa(init_net)
+    predict_net_ssa, predict_net_versions = core.get_ssa(
+        predict_net, copy.deepcopy(init_net_versions)
+    )
+
+    versioned_inputs, versioned_outputs = predict_net_ssa[op_id]
+    old_name, version = versioned_inputs[input_id]
+
+    if from_producer:
+        producer_map = get_producer_map(predict_net_ssa)
+        if not (old_name, version) in producer_map:
+            raise NotImplementedError(
+                "Can't find producer, the input {} is probably from"
+                " init_net, this is not supported yet.".format(old_name)
+            )
+        producer = producer_map[(old_name, version)]
+        rename_op_output(predict_net, producer[0], producer[1], new_name)
+        return
+
+    def contain_targets(op_ssa):
+        return (old_name, version) in op_ssa[0]
+
+    is_consumer = [contain_targets(op_ssa) for op_ssa in predict_net_ssa]
+    if sum(is_consumer) > 1:
+        raise IllegalGraphTransformError(
+            (
+                "Input '{}' of operator(#{}) are consumed by other ops, please use"
+                + " rename_op_output on the producer instead. Offending op: \n{}"
+            ).format(old_name, op_id, predict_net.op[op_id])
+        )
+
+    # update init_net
+    _rename_versioned_blob_in_proto(
+        init_net, old_name, new_name, version, init_net_ssa, {}, init_net_versions
+    )
+    # update predict_net
+    _rename_versioned_blob_in_proto(
+        predict_net,
+        old_name,
+        new_name,
+        version,
+        predict_net_ssa,
+        init_net_versions,
+        predict_net_versions,
+    )
+
+
+def rename_op_output(predict_net: caffe2_pb2.NetDef, op_id: int, output_id: int, new_name: str):
+    """
+    Rename the op_id-th operator in predict_net, change it's output_id-th input's
+        name to the new_name. It also does automatic re-route and change
+        external_output and if necessary.
+    - It allows multiple consumers of its output.
+    - This function modifies predict_net in-place, doesn't need init_net.
+    """
+    assert isinstance(predict_net, caffe2_pb2.NetDef)
+
+    ssa, blob_versions = core.get_ssa(predict_net)
+
+    versioned_inputs, versioned_outputs = ssa[op_id]
+    old_name, version = versioned_outputs[output_id]
+
+    # update predict_net
+    _rename_versioned_blob_in_proto(
+        predict_net, old_name, new_name, version, ssa, {}, blob_versions
+    )
+
+
+def get_sub_graph_external_input_output(
+    predict_net: caffe2_pb2.NetDef, sub_graph_op_indices: List[int]
+) -> Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]:
+    """
+    Return the list of external input/output of sub-graph,
+    each element is tuple of the name and corresponding version in predict_net.
+
+    external input/output is defined the same way as caffe2 NetDef.
+    """
+    ssa, versions = core.get_ssa(predict_net)
+
+    all_inputs = []
+    all_outputs = []
+    for op_id in sub_graph_op_indices:
+        all_inputs += [inp for inp in ssa[op_id][0] if inp not in all_inputs]
+        all_outputs += list(ssa[op_id][1])  # ssa output won't repeat
+
+    # for versioned blobs, external inputs are just those blob in all_inputs
+    # but not in all_outputs
+    ext_inputs = [inp for inp in all_inputs if inp not in all_outputs]
+
+    # external outputs are essentially outputs of this subgraph that are used
+    # outside of this sub-graph (including predict_net.external_output)
+    all_other_inputs = sum(
+        (ssa[i][0] for i in range(len(ssa)) if i not in sub_graph_op_indices),
+        [(outp, versions[outp]) for outp in predict_net.external_output],
+    )
+    ext_outputs = [outp for outp in all_outputs if outp in set(all_other_inputs)]
+
+    return ext_inputs, ext_outputs
+
+
+class DiGraph:
+    """ A DAG representation of caffe2 graph, each vertice is a versioned blob. """
+
+    def __init__(self):
+        self.vertices = set()
+        self.graph = collections.defaultdict(list)
+
+    def add_edge(self, u, v):
+        self.graph[u].append(v)
+        self.vertices.add(u)
+        self.vertices.add(v)
+
+    # grab from https://www.geeksforgeeks.org/find-paths-given-source-destination/
+    def get_all_paths(self, s, d):
+        visited = {k: False for k in self.vertices}
+        path = []
+        all_paths = []
+
+        def _get_all_paths_util(graph, u, d, visited, path):
+            visited[u] = True
+            path.append(u)
+            if u == d:
+                all_paths.append(copy.deepcopy(path))
+            else:
+                for i in graph[u]:
+                    if not visited[i]:
+                        _get_all_paths_util(graph, i, d, visited, path)
+            path.pop()
+            visited[u] = False
+
+        _get_all_paths_util(self.graph, s, d, visited, path)
+        return all_paths
+
+    @staticmethod
+    def from_ssa(ssa):
+        graph = DiGraph()
+        for op_id in range(len(ssa)):
+            for inp in ssa[op_id][0]:
+                for outp in ssa[op_id][1]:
+                    graph.add_edge(inp, outp)
+        return graph
+
+
+def _get_dependency_chain(ssa, versioned_target, versioned_source):
+    """
+    Return the index list of relevant operator to produce target blob from source blob,
+        if there's no dependency, return empty list.
+    """
+
+    # finding all paths between nodes can be O(N!), thus we can only search
+    # in the subgraph using the op starting from the first consumer of source blob
+    # to the producer of the target blob.
+    consumer_map = get_consumer_map(ssa)
+    producer_map = get_producer_map(ssa)
+    start_op = min(x[0] for x in consumer_map[versioned_source]) - 15
+    end_op = (
+        producer_map[versioned_target][0] + 15 if versioned_target in producer_map else start_op
+    )
+    sub_graph_ssa = ssa[start_op : end_op + 1]
+    if len(sub_graph_ssa) > 30:
+        logger.warning(
+            "Subgraph bebetween {} and {} is large (from op#{} to op#{}), it"
+            " might take non-trival time to find all paths between them.".format(
+                versioned_source, versioned_target, start_op, end_op
+            )
+        )
+
+    dag = DiGraph.from_ssa(sub_graph_ssa)
+    paths = dag.get_all_paths(versioned_source, versioned_target)  # include two ends
+    ops_in_paths = [[producer_map[blob][0] for blob in path[1:]] for path in paths]
+    return sorted(set().union(*[set(ops) for ops in ops_in_paths]))
+
+
+def identify_reshape_sub_graph(predict_net: caffe2_pb2.NetDef) -> List[List[int]]:
+    """
+    Idenfity the reshape sub-graph in a protobuf.
+    The reshape sub-graph is defined as matching the following pattern:
+
+    (input_blob) -> Op_1 -> ... -> Op_N -> (new_shape) -─┐
+        └-------------------------------------------> Reshape -> (output_blob)
+
+    Return:
+        List of sub-graphs, each sub-graph is represented as a list of indices
+        of the relavent ops, [Op_1, Op_2, ..., Op_N, Reshape]
+    """
+
+    ssa, _ = core.get_ssa(predict_net)
+
+    ret = []
+    for i, op in enumerate(predict_net.op):
+        if op.type == "Reshape":
+            assert len(op.input) == 2
+            input_ssa = ssa[i][0]
+            data_source = input_ssa[0]
+            shape_source = input_ssa[1]
+            op_indices = _get_dependency_chain(ssa, shape_source, data_source)
+            ret.append(op_indices + [i])
+    return ret
+
+
+def remove_reshape_for_fc(predict_net, params):
+    """
+    In PyTorch nn.Linear has to take 2D tensor, this often leads to reshape
+        a 4D tensor to 2D by calling .view(). However this (dynamic) reshaping
+        doesn't work well with ONNX and Int8 tools, and cause using extra
+        ops (eg. ExpandDims) that might not be available on mobile.
+    Luckily Caffe2 supports 4D tensor for FC, so we can remove those reshape
+        after exporting ONNX model.
+    """
+    from caffe2.python import core
+
+    # find all reshape sub-graph that can be removed, which is now all Reshape
+    # sub-graph whose output is only consumed by FC.
+    # TODO: to make it safer, we may need the actually value to better determine
+    # if a Reshape before FC is removable.
+    reshape_sub_graphs = identify_reshape_sub_graph(predict_net)
+    sub_graphs_to_remove = []
+    for reshape_sub_graph in reshape_sub_graphs:
+        reshape_op_id = reshape_sub_graph[-1]
+        assert predict_net.op[reshape_op_id].type == "Reshape"
+        ssa, _ = core.get_ssa(predict_net)
+        reshape_output = ssa[reshape_op_id][1][0]
+        consumers = [i for i in range(len(ssa)) if reshape_output in ssa[i][0]]
+        if all(predict_net.op[consumer].type == "FC" for consumer in consumers):
+            # safety check if the sub-graph is isolated, for this reshape sub-graph,
+            # it means it has one non-param external input and one external output.
+            ext_inputs, ext_outputs = get_sub_graph_external_input_output(
+                predict_net, reshape_sub_graph
+            )
+            non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0]
+            if len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1:
+                sub_graphs_to_remove.append(reshape_sub_graph)
+
+    # perform removing subgraph by:
+    # 1: rename the Reshape's output to its input, then the graph can be
+    #   seen as in-place itentify, meaning whose external input/output are the same.
+    # 2: simply remove those ops.
+    remove_op_ids = []
+    params_to_remove = []
+    for sub_graph in sub_graphs_to_remove:
+        logger.info(
+            "Remove Reshape sub-graph:\n{}".format(
+                "".join(["(#{:>4})\n{}".format(i, predict_net.op[i]) for i in sub_graph])
+            )
+        )
+        reshape_op_id = sub_graph[-1]
+        new_reshap_output = predict_net.op[reshape_op_id].input[0]
+        rename_op_output(predict_net, reshape_op_id, 0, new_reshap_output)
+        ext_inputs, ext_outputs = get_sub_graph_external_input_output(predict_net, sub_graph)
+        non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0]
+        params_ext_inputs = [inp for inp in ext_inputs if inp[1] == 0]
+        assert len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1
+        assert ext_outputs[0][0] == non_params_ext_inputs[0][0]
+        assert ext_outputs[0][1] == non_params_ext_inputs[0][1] + 1
+        remove_op_ids.extend(sub_graph)
+        params_to_remove.extend(params_ext_inputs)
+
+    predict_net = copy.deepcopy(predict_net)
+    new_ops = [op for i, op in enumerate(predict_net.op) if i not in remove_op_ids]
+    del predict_net.op[:]
+    predict_net.op.extend(new_ops)
+    for versioned_params in params_to_remove:
+        name = versioned_params[0]
+        logger.info("Remove params: {} from init_net and predict_net.external_input".format(name))
+        del params[name]
+        predict_net.external_input.remove(name)
+
+    return predict_net, params
+
+
+def fuse_copy_between_cpu_and_gpu(predict_net: caffe2_pb2.NetDef):
+    """
+    In-place fuse extra copy ops between cpu/gpu for the following case:
+        a -CopyAToB-> b -CopyBToA> c1 -NextOp1-> d1
+                        -CopyBToA> c2 -NextOp2-> d2
+    The fused network will look like:
+        a -NextOp1-> d1
+          -NextOp2-> d2
+    """
+
+    _COPY_OPS = ["CopyCPUToGPU", "CopyGPUToCPU"]
+
+    def _fuse_once(predict_net):
+        ssa, blob_versions = core.get_ssa(predict_net)
+        consumer_map = get_consumer_map(ssa)
+        versioned_external_output = [
+            (name, blob_versions[name]) for name in predict_net.external_output
+        ]
+
+        for op_id, op in enumerate(predict_net.op):
+            if op.type in _COPY_OPS:
+                fw_copy_versioned_output = ssa[op_id][1][0]
+                consumer_ids = [x[0] for x in consumer_map[fw_copy_versioned_output]]
+                reverse_op_type = _COPY_OPS[1 - _COPY_OPS.index(op.type)]
+
+                is_fusable = (
+                    len(consumer_ids) > 0
+                    and fw_copy_versioned_output not in versioned_external_output
+                    and all(
+                        predict_net.op[_op_id].type == reverse_op_type
+                        and ssa[_op_id][1][0] not in versioned_external_output
+                        for _op_id in consumer_ids
+                    )
+                )
+
+                if is_fusable:
+                    for rv_copy_op_id in consumer_ids:
+                        # making each NextOp uses "a" directly and removing Copy ops
+                        rs_copy_versioned_output = ssa[rv_copy_op_id][1][0]
+                        next_op_id, inp_id = consumer_map[rs_copy_versioned_output][0]
+                        predict_net.op[next_op_id].input[inp_id] = op.input[0]
+                    # remove CopyOps
+                    new_ops = [
+                        op
+                        for i, op in enumerate(predict_net.op)
+                        if i != op_id and i not in consumer_ids
+                    ]
+                    del predict_net.op[:]
+                    predict_net.op.extend(new_ops)
+                    return True
+
+        return False
+
+    # _fuse_once returns False is nothing can be fused
+    while _fuse_once(predict_net):
+        pass
+
+
+def remove_dead_end_ops(net_def: caffe2_pb2.NetDef):
+    """ remove ops if its output is not used or not in external_output """
+    ssa, versions = core.get_ssa(net_def)
+    versioned_external_output = [(name, versions[name]) for name in net_def.external_output]
+    consumer_map = get_consumer_map(ssa)
+    removed_op_ids = set()
+
+    def _is_dead_end(versioned_blob):
+        return not (
+            versioned_blob in versioned_external_output
+            or (
+                len(consumer_map[versioned_blob]) > 0
+                and all(x[0] not in removed_op_ids for x in consumer_map[versioned_blob])
+            )
+        )
+
+    for i, ssa_i in reversed(list(enumerate(ssa))):
+        versioned_outputs = ssa_i[1]
+        if all(_is_dead_end(outp) for outp in versioned_outputs):
+            removed_op_ids.add(i)
+
+    # simply removing those deadend ops should have no effect to external_output
+    new_ops = [op for i, op in enumerate(net_def.op) if i not in removed_op_ids]
+    del net_def.op[:]
+    net_def.op.extend(new_ops)
diff --git a/src/sts/detectron2/export/torchscript.py b/src/sts/detectron2/export/torchscript.py
new file mode 100644
index 0000000000000000000000000000000000000000..7939ae51fff9c4f9f55f48c1e3c69b70106dbdea
--- /dev/null
+++ b/src/sts/detectron2/export/torchscript.py
@@ -0,0 +1,127 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import os
+import torch
+
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.file_io import PathManager
+
+from .torchscript_patch import freeze_training_mode, patch_instances
+
+__all__ = ["scripting_with_instances", "dump_torchscript_IR"]
+
+
+def scripting_with_instances(model, fields):
+    """
+    Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since
+    attributes of :class:`Instances` are "dynamically" added in eager mode，it is difficult
+    for scripting to support it out of the box. This function is made to support scripting
+    a model that uses :class:`Instances`. It does the following:
+
+    1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``,
+       but with all attributes been "static".
+       The attributes need to be statically declared in the ``fields`` argument.
+    2. Register ``new_Instances``, and force scripting compiler to
+       use it when trying to compile ``Instances``.
+
+    After this function, the process will be reverted. User should be able to script another model
+    using different fields.
+
+    Example:
+        Assume that ``Instances`` in the model consist of two attributes named
+        ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and
+        :class:`Tensor` respectively during inference. You can call this function like:
+        ::
+            fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor}
+            torchscipt_model =  scripting_with_instances(model, fields)
+
+    Note:
+        It only support models in evaluation mode.
+
+    Args:
+        model (nn.Module): The input model to be exported by scripting.
+        fields (Dict[str, type]): Attribute names and corresponding type that
+            ``Instances`` will use in the model. Note that all attributes used in ``Instances``
+            need to be added, regardless of whether they are inputs/outputs of the model.
+            Data type not defined in detectron2 is not supported for now.
+
+    Returns:
+        torch.jit.ScriptModule: the model in torchscript format
+    """
+    assert TORCH_VERSION >= (1, 8), "This feature is not available in PyTorch < 1.8"
+    assert (
+        not model.training
+    ), "Currently we only support exporting models in evaluation mode to torchscript"
+
+    with freeze_training_mode(model), patch_instances(fields):
+        scripted_model = torch.jit.script(model)
+        return scripted_model
+
+
+# alias for old name
+export_torchscript_with_instances = scripting_with_instances
+
+
+def dump_torchscript_IR(model, dir):
+    """
+    Dump IR of a TracedModule/ScriptModule at various levels.
+    Useful for debugging.
+
+    Args:
+        model (TracedModule or ScriptModule): traced or scripted module
+        dir (str): output directory to dump files.
+    """
+    # TODO: support ScriptFunction as well
+    PathManager.mkdirs(dir)
+
+    def _get_script_mod(mod):
+        if isinstance(mod, torch.jit.TracedModule):
+            return mod._actual_script_module
+        return mod
+
+    # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code
+    with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f:
+
+        def get_code(mod):
+            # Try a few ways to get code using private attributes.
+            try:
+                # This contains more information than just `mod.code`
+                return _get_script_mod(mod)._c.code
+            except AttributeError:
+                pass
+            try:
+                return mod.code
+            except AttributeError:
+                return None
+
+        def dump_code(prefix, mod):
+            code = get_code(mod)
+            name = prefix or "root model"
+            if code is None:
+                f.write(f"Could not found code for {name} (type={mod.original_name})\n")
+                f.write("\n")
+            else:
+                f.write(f"\nCode for {name}, type={mod.original_name}:\n")
+                f.write(code)
+                f.write("\n")
+                f.write("-" * 80)
+
+            for name, m in mod.named_children():
+                dump_code(prefix + "." + name, m)
+
+        dump_code("", model)
+
+    # Recursively dump IR of all modules
+    with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f:
+        try:
+            f.write(_get_script_mod(model)._c.dump_to_str(True, False, False))
+        except AttributeError:
+            pass
+
+    # Dump IR of the entire graph (all submodules inlined)
+    with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f:
+        f.write(str(model.inlined_graph))
+
+    # Dump the model structure in pytorch style
+    with PathManager.open(os.path.join(dir, "model.txt"), "w") as f:
+        f.write(str(model))
diff --git a/src/sts/detectron2/export/torchscript_patch.py b/src/sts/detectron2/export/torchscript_patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..618e7e0c4bd58e4fc1dc7c3d0e69c1b2ae73089e
--- /dev/null
+++ b/src/sts/detectron2/export/torchscript_patch.py
@@ -0,0 +1,377 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import os
+import sys
+import tempfile
+from contextlib import ExitStack, contextmanager
+from copy import deepcopy
+from unittest import mock
+import torch
+from torch import nn
+
+# need some explicit imports due to https://github.com/pytorch/pytorch/issues/38964
+import detectron2  # noqa F401
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.env import _import_file
+
+_counter = 0
+
+
+def _clear_jit_cache():
+    from torch.jit._recursive import concrete_type_store
+    from torch.jit._state import _jit_caching_layer
+
+    concrete_type_store.type_store.clear()  # for modules
+    _jit_caching_layer.clear()  # for free functions
+
+
+def _add_instances_conversion_methods(newInstances):
+    """
+    Add from_instances methods to the scripted Instances class.
+    """
+    cls_name = newInstances.__name__
+
+    @torch.jit.unused
+    def from_instances(instances: Instances):
+        """
+        Create scripted Instances from original Instances
+        """
+        fields = instances.get_fields()
+        image_size = instances.image_size
+        ret = newInstances(image_size)
+        for name, val in fields.items():
+            assert hasattr(ret, f"_{name}"), f"No attribute named {name} in {cls_name}"
+            setattr(ret, name, deepcopy(val))
+        return ret
+
+    newInstances.from_instances = from_instances
+
+
+@contextmanager
+def patch_instances(fields):
+    """
+    A contextmanager, under which the Instances class in detectron2 is replaced
+    by a statically-typed scriptable class, defined by `fields`.
+    See more in `scripting_with_instances`.
+    """
+
+    with tempfile.TemporaryDirectory(prefix="detectron2") as dir, tempfile.NamedTemporaryFile(
+        mode="w", encoding="utf-8", suffix=".py", dir=dir, delete=False
+    ) as f:
+        try:
+            # Objects that use Instances should not reuse previously-compiled
+            # results in cache, because `Instances` could be a new class each time.
+            _clear_jit_cache()
+
+            cls_name, s = _gen_instance_module(fields)
+            f.write(s)
+            f.flush()
+            f.close()
+
+            module = _import(f.name)
+            new_instances = getattr(module, cls_name)
+            _ = torch.jit.script(new_instances)
+            # let torchscript think Instances was scripted already
+            Instances.__torch_script_class__ = True
+            # let torchscript find new_instances when looking for the jit type of Instances
+            Instances._jit_override_qualname = torch._jit_internal._qualified_name(new_instances)
+
+            _add_instances_conversion_methods(new_instances)
+            yield new_instances
+        finally:
+            try:
+                del Instances.__torch_script_class__
+                del Instances._jit_override_qualname
+            except AttributeError:
+                pass
+            sys.modules.pop(module.__name__)
+
+
+def _gen_instance_class(fields):
+    """
+    Args:
+        fields (dict[name: type])
+    """
+
+    class _FieldType:
+        def __init__(self, name, type_):
+            assert isinstance(name, str), f"Field name must be str, got {name}"
+            self.name = name
+            self.type_ = type_
+            self.annotation = f"{type_.__module__}.{type_.__name__}"
+
+    fields = [_FieldType(k, v) for k, v in fields.items()]
+
+    def indent(level, s):
+        return " " * 4 * level + s
+
+    lines = []
+
+    global _counter
+    _counter += 1
+
+    cls_name = "ScriptedInstances{}".format(_counter)
+
+    field_names = tuple(x.name for x in fields)
+    lines.append(
+        f"""
+class {cls_name}:
+    def __init__(self, image_size: Tuple[int, int]):
+        self.image_size = image_size
+        self._field_names = {field_names}
+"""
+    )
+
+    for f in fields:
+        lines.append(
+            indent(2, f"self._{f.name} = torch.jit.annotate(Optional[{f.annotation}], None)")
+        )
+
+    for f in fields:
+        lines.append(
+            f"""
+    @property
+    def {f.name}(self) -> {f.annotation}:
+        # has to use a local for type refinement
+        # https://pytorch.org/docs/stable/jit_language_reference.html#optional-type-refinement
+        t = self._{f.name}
+        assert t is not None
+        return t
+
+    @{f.name}.setter
+    def {f.name}(self, value: {f.annotation}) -> None:
+        self._{f.name} = value
+"""
+        )
+
+    # support method `__len__`
+    lines.append(
+        """
+    def __len__(self) -> int:
+"""
+    )
+    for f in fields:
+        lines.append(
+            f"""
+        t = self._{f.name}
+        if t is not None:
+            return len(t)
+"""
+        )
+    lines.append(
+        """
+        raise NotImplementedError("Empty Instances does not support __len__!")
+"""
+    )
+
+    # support method `has`
+    lines.append(
+        """
+    def has(self, name: str) -> bool:
+"""
+    )
+    for f in fields:
+        lines.append(
+            f"""
+        if name == "{f.name}":
+            return self._{f.name} is not None
+"""
+        )
+    lines.append(
+        """
+        return False
+"""
+    )
+
+    # support method `to`
+    lines.append(
+        f"""
+    def to(self, device: torch.device) -> "{cls_name}":
+        ret = {cls_name}(self.image_size)
+"""
+    )
+    for f in fields:
+        if hasattr(f.type_, "to"):
+            lines.append(
+                f"""
+        t = self._{f.name}
+        if t is not None:
+            ret._{f.name} = t.to(device)
+"""
+            )
+        else:
+            # For now, ignore fields that cannot be moved to devices.
+            # Maybe can support other tensor-like classes (e.g. __torch_function__)
+            pass
+    lines.append(
+        """
+        return ret
+"""
+    )
+
+    # support method `getitem`
+    lines.append(
+        f"""
+    def __getitem__(self, item) -> "{cls_name}":
+        ret = {cls_name}(self.image_size)
+"""
+    )
+    for f in fields:
+        lines.append(
+            f"""
+        t = self._{f.name}
+        if t is not None:
+            ret._{f.name} = t[item]
+"""
+        )
+    lines.append(
+        """
+        return ret
+"""
+    )
+
+    # support method `get_fields()`
+    lines.append(
+        """
+    def get_fields(self) -> Dict[str, Tensor]:
+        ret = {}
+    """
+    )
+    for f in fields:
+        if f.type_ == Boxes:
+            stmt = "t.tensor"
+        elif f.type_ == torch.Tensor:
+            stmt = "t"
+        else:
+            stmt = f'assert False, "unsupported type {str(f.type_)}"'
+        lines.append(
+            f"""
+        t = self._{f.name}
+        if t is not None:
+            ret["{f.name}"] = {stmt}
+        """
+        )
+    lines.append(
+        """
+        return ret"""
+    )
+    return cls_name, os.linesep.join(lines)
+
+
+def _gen_instance_module(fields):
+    # TODO: find a more automatic way to enable import of other classes
+    s = """
+from copy import deepcopy
+import torch
+from torch import Tensor
+import typing
+from typing import *
+
+import detectron2
+from detectron2.structures import Boxes, Instances
+
+"""
+
+    cls_name, cls_def = _gen_instance_class(fields)
+    s += cls_def
+    return cls_name, s
+
+
+def _import(path):
+    return _import_file(
+        "{}{}".format(sys.modules[__name__].__name__, _counter), path, make_importable=True
+    )
+
+
+@contextmanager
+def patch_builtin_len(modules=()):
+    """
+    Patch the builtin len() function of a few detectron2 modules
+    to use __len__ instead, because __len__ does not convert values to
+    integers and therefore is friendly to tracing.
+
+    Args:
+        modules (list[stsr]): names of extra modules to patch len(), in
+            addition to those in detectron2.
+    """
+
+    def _new_len(obj):
+        return obj.__len__()
+
+    with ExitStack() as stack:
+        MODULES = [
+            "detectron2.modeling.roi_heads.fast_rcnn",
+            "detectron2.modeling.roi_heads.mask_head",
+            "detectron2.modeling.roi_heads.keypoint_head",
+        ] + list(modules)
+        ctxs = [stack.enter_context(mock.patch(mod + ".len")) for mod in MODULES]
+        for m in ctxs:
+            m.side_effect = _new_len
+        yield
+
+
+def patch_nonscriptable_classes():
+    """
+    Apply patches on a few nonscriptable detectron2 classes.
+    Should not have side-effects on eager usage.
+    """
+    # __prepare_scriptable__ can also be added to models for easier maintenance.
+    # But it complicates the clean model code.
+
+    from detectron2.modeling.backbone import ResNet, FPN
+
+    # Due to https://github.com/pytorch/pytorch/issues/36061,
+    # we change backbone to use ModuleList for scripting.
+    # (note: this changes param names in state_dict)
+
+    def prepare_resnet(self):
+        ret = deepcopy(self)
+        ret.stages = nn.ModuleList(ret.stages)
+        for k in self.stage_names:
+            delattr(ret, k)
+        return ret
+
+    ResNet.__prepare_scriptable__ = prepare_resnet
+
+    def prepare_fpn(self):
+        ret = deepcopy(self)
+        ret.lateral_convs = nn.ModuleList(ret.lateral_convs)
+        ret.output_convs = nn.ModuleList(ret.output_convs)
+        for name, _ in self.named_children():
+            if name.startswith("fpn_"):
+                delattr(ret, name)
+        return ret
+
+    FPN.__prepare_scriptable__ = prepare_fpn
+
+    # Annotate some attributes to be constants for the purpose of scripting,
+    # even though they are not constants in eager mode.
+    from detectron2.modeling.roi_heads import StandardROIHeads
+
+    if hasattr(StandardROIHeads, "__annotations__"):
+        # copy first to avoid editing annotations of base class
+        StandardROIHeads.__annotations__ = deepcopy(StandardROIHeads.__annotations__)
+        StandardROIHeads.__annotations__["mask_on"] = torch.jit.Final[bool]
+        StandardROIHeads.__annotations__["keypoint_on"] = torch.jit.Final[bool]
+
+
+# These patches are not supposed to have side-effects.
+patch_nonscriptable_classes()
+
+
+@contextmanager
+def freeze_training_mode(model):
+    """
+    A context manager that annotates the "training" attribute of every submodule
+    to constant, so that the training codepath in these modules can be
+    meta-compiled away. Upon exiting, the annotations are reverted.
+    """
+    classes = {type(x) for x in model.modules()}
+    # __constants__ is the old way to annotate constants and not compatible
+    # with __annotations__ .
+    classes = {x for x in classes if not hasattr(x, "__constants__")}
+    for cls in classes:
+        cls.__annotations__["training"] = torch.jit.Final[bool]
+    yield
+    for cls in classes:
+        cls.__annotations__["training"] = bool
diff --git a/src/sts/detectron2/layers/__init__.py b/src/sts/detectron2/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8bd1fb024d1cb911dda3f8a77f7ec3ad2e63798
--- /dev/null
+++ b/src/sts/detectron2/layers/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm
+from .deform_conv import DeformConv, ModulatedDeformConv
+from .mask_ops import paste_masks_in_image
+from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated
+from .roi_align import ROIAlign, roi_align
+from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
+from .shape_spec import ShapeSpec
+from .wrappers import (
+    BatchNorm2d,
+    Conv2d,
+    ConvTranspose2d,
+    cat,
+    interpolate,
+    Linear,
+    nonzero_tuple,
+    cross_entropy,
+)
+from .blocks import CNNBlockBase, DepthwiseSeparableConv2d
+from .aspp import ASPP
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/detectron2/layers/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b2da85b1342135d75d74863d66f996eba321aa29
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/aspp.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/aspp.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d95aa6057bee466e7ed403d9c06ae282f58315c5
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/aspp.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/batch_norm.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/batch_norm.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d3fca7cea1e8fc292b24441516407b45fbceeed8
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/batch_norm.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/blocks.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/blocks.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2a4acab46daaadcefafa8b4e4fab8fe8f95eb45
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/blocks.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/deform_conv.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/deform_conv.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..11e27018c247e562c0b03825987bf6912c2bc546
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/deform_conv.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/mask_ops.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/mask_ops.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c7dcd83bbea3a3cc488d0342b4b1dd9a0c639e0
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/mask_ops.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/nms.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/nms.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..013b344d654cb20bc1b08aefe5a6199cd6d3b067
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/nms.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/roi_align.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/roi_align.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..87c84e9765ced60d16e52719f2f047dfc511c575
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/roi_align.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/roi_align_rotated.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/roi_align_rotated.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6dd3bf6bc2b57ee83b9d2e3fcb30cc5822d16cc6
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/roi_align_rotated.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/rotated_boxes.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/rotated_boxes.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..740362481d0c7761ce00943eae7d5acbcac3f579
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/rotated_boxes.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/shape_spec.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/shape_spec.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af756707f8bc552a804a4b35d6afcab75ce27078
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/shape_spec.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/__pycache__/wrappers.cpython-38.pyc b/src/sts/detectron2/layers/__pycache__/wrappers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5688b636c659ce8c1d3d488b1533c71dc065d85f
Binary files /dev/null and b/src/sts/detectron2/layers/__pycache__/wrappers.cpython-38.pyc differ
diff --git a/src/sts/detectron2/layers/aspp.py b/src/sts/detectron2/layers/aspp.py
new file mode 100644
index 0000000000000000000000000000000000000000..14861aa9ede4fea6a69a49f189bcab997b558148
--- /dev/null
+++ b/src/sts/detectron2/layers/aspp.py
@@ -0,0 +1,144 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from copy import deepcopy
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from .batch_norm import get_norm
+from .blocks import DepthwiseSeparableConv2d
+from .wrappers import Conv2d
+
+
+class ASPP(nn.Module):
+    """
+    Atrous Spatial Pyramid Pooling (ASPP).
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        dilations,
+        *,
+        norm,
+        activation,
+        pool_kernel_size=None,
+        dropout: float = 0.0,
+        use_depthwise_separable_conv=False,
+    ):
+        """
+        Args:
+            in_channels (int): number of input channels for ASPP.
+            out_channels (int): number of output channels.
+            dilations (list): a list of 3 dilations in ASPP.
+            norm (str or callable): normalization for all conv layers.
+                See :func:`layers.get_norm` for supported format. norm is
+                applied to all conv layers except the conv following
+                global average pooling.
+            activation (callable): activation function.
+            pool_kernel_size (tuple, list): the average pooling size (kh, kw)
+                for image pooling layer in ASPP. If set to None, it always
+                performs global average pooling. If not None, it must be
+                divisible by the shape of inputs in forward(). It is recommended
+                to use a fixed input feature size in training, and set this
+                option to match this size, so that it performs global average
+                pooling in training, and the size of the pooling window stays
+                consistent in inference.
+            dropout (float): apply dropout on the output of ASPP. It is used in
+                the official DeepLab implementation with a rate of 0.1:
+                https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532  # noqa
+            use_depthwise_separable_conv (bool): use DepthwiseSeparableConv2d
+                for 3x3 convs in ASPP, proposed in :paper:`DeepLabV3+`.
+        """
+        super(ASPP, self).__init__()
+        assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations))
+        self.pool_kernel_size = pool_kernel_size
+        self.dropout = dropout
+        use_bias = norm == ""
+        self.convs = nn.ModuleList()
+        # conv 1x1
+        self.convs.append(
+            Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                bias=use_bias,
+                norm=get_norm(norm, out_channels),
+                activation=deepcopy(activation),
+            )
+        )
+        weight_init.c2_xavier_fill(self.convs[-1])
+        # atrous convs
+        for dilation in dilations:
+            if use_depthwise_separable_conv:
+                self.convs.append(
+                    DepthwiseSeparableConv2d(
+                        in_channels,
+                        out_channels,
+                        kernel_size=3,
+                        padding=dilation,
+                        dilation=dilation,
+                        norm1=norm,
+                        activation1=deepcopy(activation),
+                        norm2=norm,
+                        activation2=deepcopy(activation),
+                    )
+                )
+            else:
+                self.convs.append(
+                    Conv2d(
+                        in_channels,
+                        out_channels,
+                        kernel_size=3,
+                        padding=dilation,
+                        dilation=dilation,
+                        bias=use_bias,
+                        norm=get_norm(norm, out_channels),
+                        activation=deepcopy(activation),
+                    )
+                )
+                weight_init.c2_xavier_fill(self.convs[-1])
+        # image pooling
+        # We do not add BatchNorm because the spatial resolution is 1x1,
+        # the original TF implementation has BatchNorm.
+        if pool_kernel_size is None:
+            image_pooling = nn.Sequential(
+                nn.AdaptiveAvgPool2d(1),
+                Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
+            )
+        else:
+            image_pooling = nn.Sequential(
+                nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1),
+                Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)),
+            )
+        weight_init.c2_xavier_fill(image_pooling[1])
+        self.convs.append(image_pooling)
+
+        self.project = Conv2d(
+            5 * out_channels,
+            out_channels,
+            kernel_size=1,
+            bias=use_bias,
+            norm=get_norm(norm, out_channels),
+            activation=deepcopy(activation),
+        )
+        weight_init.c2_xavier_fill(self.project)
+
+    def forward(self, x):
+        size = x.shape[-2:]
+        if self.pool_kernel_size is not None:
+            if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]:
+                raise ValueError(
+                    "`pool_kernel_size` must be divisible by the shape of inputs. "
+                    "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size)
+                )
+        res = []
+        for conv in self.convs:
+            res.append(conv(x))
+        res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False)
+        res = torch.cat(res, dim=1)
+        res = self.project(res)
+        res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res
+        return res
diff --git a/src/sts/detectron2/layers/batch_norm.py b/src/sts/detectron2/layers/batch_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5382834d2997aa348932430709acb8b35be31b4
--- /dev/null
+++ b/src/sts/detectron2/layers/batch_norm.py
@@ -0,0 +1,231 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import torch
+import torch.distributed as dist
+from fvcore.nn.distributed import differentiable_all_reduce
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.utils import comm, env
+
+from .wrappers import BatchNorm2d
+
+
+class FrozenBatchNorm2d(nn.Module):
+    """
+    BatchNorm2d where the batch statistics and the affine parameters are fixed.
+
+    It contains non-trainable buffers called
+    "weight" and "bias", "running_mean", "running_var",
+    initialized to perform identity transformation.
+
+    The pre-trained backbone models from Caffe2 only contain "weight" and "bias",
+    which are computed from the original four parameters of BN.
+    The affine transform `x * weight + bias` will perform the equivalent
+    computation of `(x - running_mean) / sqrt(running_var) * weight + bias`.
+    When loading a backbone model from Caffe2, "running_mean" and "running_var"
+    will be left unchanged as identity transformation.
+
+    Other pre-trained backbone models may contain all 4 parameters.
+
+    The forward is implemented by `F.batch_norm(..., training=False)`.
+    """
+
+    _version = 3
+
+    def __init__(self, num_features, eps=1e-5):
+        super().__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.register_buffer("weight", torch.ones(num_features))
+        self.register_buffer("bias", torch.zeros(num_features))
+        self.register_buffer("running_mean", torch.zeros(num_features))
+        self.register_buffer("running_var", torch.ones(num_features) - eps)
+
+    def forward(self, x):
+        if x.requires_grad:
+            # When gradients are needed, F.batch_norm will use extra memory
+            # because its backward op computes gradients for weight/bias as well.
+            scale = self.weight * (self.running_var + self.eps).rsqrt()
+            bias = self.bias - self.running_mean * scale
+            scale = scale.reshape(1, -1, 1, 1)
+            bias = bias.reshape(1, -1, 1, 1)
+            out_dtype = x.dtype  # may be half
+            return x * scale.to(out_dtype) + bias.to(out_dtype)
+        else:
+            # When gradients are not needed, F.batch_norm is a single fused op
+            # and provide more optimization opportunities.
+            return F.batch_norm(
+                x,
+                self.running_mean,
+                self.running_var,
+                self.weight,
+                self.bias,
+                training=False,
+                eps=self.eps,
+            )
+
+    def _load_from_state_dict(
+        self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+    ):
+        version = local_metadata.get("version", None)
+
+        if version is None or version < 2:
+            # No running_mean/var in early versions
+            # This will silent the warnings
+            if prefix + "running_mean" not in state_dict:
+                state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean)
+            if prefix + "running_var" not in state_dict:
+                state_dict[prefix + "running_var"] = torch.ones_like(self.running_var)
+
+        # NOTE: if a checkpoint is trained with BatchNorm and loaded (together with
+        # version number) to FrozenBatchNorm, running_var will be wrong. One solution
+        # is to remove the version number from the checkpoint.
+        if version is not None and version < 3:
+            logger = logging.getLogger(__name__)
+            logger.info("FrozenBatchNorm {} is upgraded to version 3.".format(prefix.rstrip(".")))
+            # In version < 3, running_var are used without +eps.
+            state_dict[prefix + "running_var"] -= self.eps
+
+        super()._load_from_state_dict(
+            state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+        )
+
+    def __repr__(self):
+        return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps)
+
+    @classmethod
+    def convert_frozen_batchnorm(cls, module):
+        """
+        Convert all BatchNorm/SyncBatchNorm in module into FrozenBatchNorm.
+
+        Args:
+            module (torch.nn.Module):
+
+        Returns:
+            If module is BatchNorm/SyncBatchNorm, returns a new module.
+            Otherwise, in-place convert module and return it.
+
+        Similar to convert_sync_batchnorm in
+        https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py
+        """
+        bn_module = nn.modules.batchnorm
+        bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm)
+        res = module
+        if isinstance(module, bn_module):
+            res = cls(module.num_features)
+            if module.affine:
+                res.weight.data = module.weight.data.clone().detach()
+                res.bias.data = module.bias.data.clone().detach()
+            res.running_mean.data = module.running_mean.data
+            res.running_var.data = module.running_var.data
+            res.eps = module.eps
+        else:
+            for name, child in module.named_children():
+                new_child = cls.convert_frozen_batchnorm(child)
+                if new_child is not child:
+                    res.add_module(name, new_child)
+        return res
+
+
+def get_norm(norm, out_channels):
+    """
+    Args:
+        norm (str or callable): either one of BN, SyncBN, FrozenBN, GN;
+            or a callable that takes a channel number and returns
+            the normalization layer as a nn.Module.
+
+    Returns:
+        nn.Module or None: the normalization layer
+    """
+    if norm is None:
+        return None
+    if isinstance(norm, str):
+        if len(norm) == 0:
+            return None
+        norm = {
+            "BN": BatchNorm2d,
+            # Fixed in https://github.com/pytorch/pytorch/pull/36382
+            "SyncBN": NaiveSyncBatchNorm if env.TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm,
+            "FrozenBN": FrozenBatchNorm2d,
+            "GN": lambda channels: nn.GroupNorm(32, channels),
+            # for debugging:
+            "nnSyncBN": nn.SyncBatchNorm,
+            "naiveSyncBN": NaiveSyncBatchNorm,
+        }[norm]
+    return norm(out_channels)
+
+
+class NaiveSyncBatchNorm(BatchNorm2d):
+    """
+    In PyTorch<=1.5, ``nn.SyncBatchNorm`` has incorrect gradient
+    when the batch size on each worker is different.
+    (e.g., when scale augmentation is used, or when it is applied to mask head).
+
+    This is a slower but correct alternative to `nn.SyncBatchNorm`.
+
+    Note:
+        There isn't a single definition of Sync BatchNorm.
+
+        When ``stats_mode==""``, this module computes overall statistics by using
+        statistics of each worker with equal weight.  The result is true statistics
+        of all samples (as if they are all on one worker) only when all workers
+        have the same (N, H, W). This mode does not support inputs with zero batch size.
+
+        When ``stats_mode=="N"``, this module computes overall statistics by weighting
+        the statistics of each worker by their ``N``. The result is true statistics
+        of all samples (as if they are all on one worker) only when all workers
+        have the same (H, W). It is slower than ``stats_mode==""``.
+
+        Even though the result of this module may not be the true statistics of all samples,
+        it may still be reasonable because it might be preferrable to assign equal weights
+        to all workers, regardless of their (H, W) dimension, instead of putting larger weight
+        on larger images. From preliminary experiments, little difference is found between such
+        a simplified implementation and an accurate computation of overall mean & variance.
+    """
+
+    def __init__(self, *args, stats_mode="", **kwargs):
+        super().__init__(*args, **kwargs)
+        assert stats_mode in ["", "N"]
+        self._stats_mode = stats_mode
+
+    def forward(self, input):
+        if comm.get_world_size() == 1 or not self.training:
+            return super().forward(input)
+
+        B, C = input.shape[0], input.shape[1]
+
+        mean = torch.mean(input, dim=[0, 2, 3])
+        meansqr = torch.mean(input * input, dim=[0, 2, 3])
+
+        if self._stats_mode == "":
+            assert B > 0, 'SyncBatchNorm(stats_mode="") does not support zero batch size.'
+            vec = torch.cat([mean, meansqr], dim=0)
+            vec = differentiable_all_reduce(vec) * (1.0 / dist.get_world_size())
+            mean, meansqr = torch.split(vec, C)
+            momentum = self.momentum
+        else:
+            if B == 0:
+                vec = torch.zeros([2 * C + 1], device=mean.device, dtype=mean.dtype)
+                vec = vec + input.sum()  # make sure there is gradient w.r.t input
+            else:
+                vec = torch.cat(
+                    [mean, meansqr, torch.ones([1], device=mean.device, dtype=mean.dtype)], dim=0
+                )
+            vec = differentiable_all_reduce(vec * B)
+
+            total_batch = vec[-1].detach()
+            momentum = total_batch.clamp(max=1) * self.momentum  # no update if total_batch is 0
+            total_batch = torch.max(total_batch, torch.ones_like(total_batch))  # avoid div-by-zero
+            mean, meansqr, _ = torch.split(vec / total_batch, C)
+
+        var = meansqr - mean * mean
+        invstd = torch.rsqrt(var + self.eps)
+        scale = self.weight * invstd
+        bias = self.bias - mean * scale
+        scale = scale.reshape(1, -1, 1, 1)
+        bias = bias.reshape(1, -1, 1, 1)
+
+        self.running_mean += momentum * (mean.detach() - self.running_mean)
+        self.running_var += momentum * (var.detach() - self.running_var)
+        return input * scale + bias
diff --git a/src/sts/detectron2/layers/blocks.py b/src/sts/detectron2/layers/blocks.py
new file mode 100644
index 0000000000000000000000000000000000000000..1995a4bf7339e8deb7eaaffda4f819dda55e7ac7
--- /dev/null
+++ b/src/sts/detectron2/layers/blocks.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import fvcore.nn.weight_init as weight_init
+from torch import nn
+
+from .batch_norm import FrozenBatchNorm2d, get_norm
+from .wrappers import Conv2d
+
+
+"""
+CNN building blocks.
+"""
+
+
+class CNNBlockBase(nn.Module):
+    """
+    A CNN block is assumed to have input channels, output channels and a stride.
+    The input and output of `forward()` method must be NCHW tensors.
+    The method can perform arbitrary computation but must match the given
+    channels and stride specification.
+
+    Attribute:
+        in_channels (int):
+        out_channels (int):
+        stride (int):
+    """
+
+    def __init__(self, in_channels, out_channels, stride):
+        """
+        The `__init__` method of any subclass should also contain these arguments.
+
+        Args:
+            in_channels (int):
+            out_channels (int):
+            stride (int):
+        """
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+
+    def freeze(self):
+        """
+        Make this block not trainable.
+        This method sets all parameters to `requires_grad=False`,
+        and convert all BatchNorm layers to FrozenBatchNorm
+
+        Returns:
+            the block itself
+        """
+        for p in self.parameters():
+            p.requires_grad = False
+        FrozenBatchNorm2d.convert_frozen_batchnorm(self)
+        return self
+
+
+class DepthwiseSeparableConv2d(nn.Module):
+    """
+    A kxk depthwise convolution + a 1x1 convolution.
+
+    In :paper:`xception`, norm & activation are applied on the second conv.
+    :paper:`mobilenet` uses norm & activation on both convs.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=3,
+        padding=1,
+        dilation=1,
+        *,
+        norm1=None,
+        activation1=None,
+        norm2=None,
+        activation2=None,
+    ):
+        """
+        Args:
+            norm1, norm2 (str or callable): normalization for the two conv layers.
+            activation1, activation2 (callable(Tensor) -> Tensor): activation
+                function for the two conv layers.
+        """
+        super().__init__()
+        self.depthwise = Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=kernel_size,
+            padding=padding,
+            dilation=dilation,
+            groups=in_channels,
+            bias=not norm1,
+            norm=get_norm(norm1, in_channels),
+            activation=activation1,
+        )
+        self.pointwise = Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=1,
+            bias=not norm2,
+            norm=get_norm(norm2, out_channels),
+            activation=activation2,
+        )
+
+        # default initialization
+        weight_init.c2_msra_fill(self.depthwise)
+        weight_init.c2_msra_fill(self.pointwise)
+
+    def forward(self, x):
+        return self.pointwise(self.depthwise(x))
diff --git a/src/sts/detectron2/layers/csrc/README.md b/src/sts/detectron2/layers/csrc/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..778ed3da0bae89820831bcd8a72ff7b9cad8d4dd
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/README.md
@@ -0,0 +1,7 @@
+
+
+To add a new Op:
+
+1. Create a new directory
+2. Implement new ops there
+3. Delcare its Python interface in `vision.cpp`.
diff --git a/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h b/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h
new file mode 100644
index 0000000000000000000000000000000000000000..7ceb5185e0305de554644dc082866e41416520b8
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h
@@ -0,0 +1,115 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#pragma once
+#include <torch/types.h>
+
+namespace detectron2 {
+
+at::Tensor ROIAlignRotated_forward_cpu(
+    const at::Tensor& input,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio);
+
+at::Tensor ROIAlignRotated_backward_cpu(
+    const at::Tensor& grad,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int batch_size,
+    const int channels,
+    const int height,
+    const int width,
+    const int sampling_ratio);
+
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+at::Tensor ROIAlignRotated_forward_cuda(
+    const at::Tensor& input,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio);
+
+at::Tensor ROIAlignRotated_backward_cuda(
+    const at::Tensor& grad,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int batch_size,
+    const int channels,
+    const int height,
+    const int width,
+    const int sampling_ratio);
+#endif
+
+// Interface for Python
+inline at::Tensor ROIAlignRotated_forward(
+    const at::Tensor& input,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio) {
+  if (input.is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+    return ROIAlignRotated_forward_cuda(
+        input,
+        rois,
+        spatial_scale,
+        pooled_height,
+        pooled_width,
+        sampling_ratio);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+  return ROIAlignRotated_forward_cpu(
+      input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
+}
+
+inline at::Tensor ROIAlignRotated_backward(
+    const at::Tensor& grad,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int batch_size,
+    const int channels,
+    const int height,
+    const int width,
+    const int sampling_ratio) {
+  if (grad.is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+    return ROIAlignRotated_backward_cuda(
+        grad,
+        rois,
+        spatial_scale,
+        pooled_height,
+        pooled_width,
+        batch_size,
+        channels,
+        height,
+        width,
+        sampling_ratio);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+  return ROIAlignRotated_backward_cpu(
+      grad,
+      rois,
+      spatial_scale,
+      pooled_height,
+      pooled_width,
+      batch_size,
+      channels,
+      height,
+      width,
+      sampling_ratio);
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp b/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2a3d3056cc71a4acaafb570739a9dd247a7eb1ed
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp
@@ -0,0 +1,522 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#include <ATen/TensorUtils.h>
+#include "ROIAlignRotated.h"
+
+// Note: this implementation originates from the Caffe2 ROIAlignRotated Op
+// and PyTorch ROIAlign (non-rotated) Op implementations.
+// The key difference between this implementation and those ones is
+// we don't do "legacy offset" in this version, as there aren't many previous
+// works, if any, using the "legacy" ROIAlignRotated Op.
+// This would make the interface a bit cleaner.
+
+namespace detectron2 {
+
+namespace {
+template <typename T>
+struct PreCalc {
+  int pos1;
+  int pos2;
+  int pos3;
+  int pos4;
+  T w1;
+  T w2;
+  T w3;
+  T w4;
+};
+
+template <typename T>
+void pre_calc_for_bilinear_interpolate(
+    const int height,
+    const int width,
+    const int pooled_height,
+    const int pooled_width,
+    const int iy_upper,
+    const int ix_upper,
+    T roi_start_h,
+    T roi_start_w,
+    T bin_size_h,
+    T bin_size_w,
+    int roi_bin_grid_h,
+    int roi_bin_grid_w,
+    T roi_center_h,
+    T roi_center_w,
+    T cos_theta,
+    T sin_theta,
+    std::vector<PreCalc<T>>& pre_calc) {
+  int pre_calc_index = 0;
+  for (int ph = 0; ph < pooled_height; ph++) {
+    for (int pw = 0; pw < pooled_width; pw++) {
+      for (int iy = 0; iy < iy_upper; iy++) {
+        const T yy = roi_start_h + ph * bin_size_h +
+            static_cast<T>(iy + .5f) * bin_size_h /
+                static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
+        for (int ix = 0; ix < ix_upper; ix++) {
+          const T xx = roi_start_w + pw * bin_size_w +
+              static_cast<T>(ix + .5f) * bin_size_w /
+                  static_cast<T>(roi_bin_grid_w);
+
+          // Rotate by theta around the center and translate
+          // In image space, (y, x) is the order for Right Handed System,
+          // and this is essentially multiplying the point by a rotation matrix
+          // to rotate it counterclockwise through angle theta.
+          T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+          T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+          // deal with: inverse elements are out of feature map boundary
+          if (y < -1.0 || y > height || x < -1.0 || x > width) {
+            // empty
+            PreCalc<T> pc;
+            pc.pos1 = 0;
+            pc.pos2 = 0;
+            pc.pos3 = 0;
+            pc.pos4 = 0;
+            pc.w1 = 0;
+            pc.w2 = 0;
+            pc.w3 = 0;
+            pc.w4 = 0;
+            pre_calc[pre_calc_index] = pc;
+            pre_calc_index += 1;
+            continue;
+          }
+
+          if (y < 0) {
+            y = 0;
+          }
+          if (x < 0) {
+            x = 0;
+          }
+
+          int y_low = (int)y;
+          int x_low = (int)x;
+          int y_high;
+          int x_high;
+
+          if (y_low >= height - 1) {
+            y_high = y_low = height - 1;
+            y = (T)y_low;
+          } else {
+            y_high = y_low + 1;
+          }
+
+          if (x_low >= width - 1) {
+            x_high = x_low = width - 1;
+            x = (T)x_low;
+          } else {
+            x_high = x_low + 1;
+          }
+
+          T ly = y - y_low;
+          T lx = x - x_low;
+          T hy = 1. - ly, hx = 1. - lx;
+          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+          // save weights and indices
+          PreCalc<T> pc;
+          pc.pos1 = y_low * width + x_low;
+          pc.pos2 = y_low * width + x_high;
+          pc.pos3 = y_high * width + x_low;
+          pc.pos4 = y_high * width + x_high;
+          pc.w1 = w1;
+          pc.w2 = w2;
+          pc.w3 = w3;
+          pc.w4 = w4;
+          pre_calc[pre_calc_index] = pc;
+
+          pre_calc_index += 1;
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+void bilinear_interpolate_gradient(
+    const int height,
+    const int width,
+    T y,
+    T x,
+    T& w1,
+    T& w2,
+    T& w3,
+    T& w4,
+    int& x_low,
+    int& x_high,
+    int& y_low,
+    int& y_high) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    w1 = w2 = w3 = w4 = 0.;
+    x_low = x_high = y_low = y_high = -1;
+    return;
+  }
+
+  if (y < 0) {
+    y = 0;
+  }
+
+  if (x < 0) {
+    x = 0;
+  }
+
+  y_low = (int)y;
+  x_low = (int)x;
+
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+
+  // reference in forward
+  // T v1 = input[y_low * width + x_low];
+  // T v2 = input[y_low * width + x_high];
+  // T v3 = input[y_high * width + x_low];
+  // T v4 = input[y_high * width + x_high];
+  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+  return;
+}
+
+template <class T>
+inline void add(T* address, const T& val) {
+  *address += val;
+}
+
+} // namespace
+
+template <typename T>
+void ROIAlignRotatedForward(
+    const int nthreads,
+    const T* input,
+    const T& spatial_scale,
+    const int channels,
+    const int height,
+    const int width,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio,
+    const T* rois,
+    T* output) {
+  int n_rois = nthreads / channels / pooled_width / pooled_height;
+  // (n, c, ph, pw) is an element in the pooled output
+  // can be parallelized using omp
+  // #pragma omp parallel for num_threads(32)
+  for (int n = 0; n < n_rois; n++) {
+    int index_n = n * channels * pooled_width * pooled_height;
+
+    const T* current_roi = rois + n * 6;
+    int roi_batch_ind = current_roi[0];
+
+    // Do not use rounding; this implementation detail is critical
+    // ROIAlignRotated supports align == true, i.e., continuous coordinate
+    // by default, thus the 0.5 offset
+    T offset = (T)0.5;
+    T roi_center_w = current_roi[1] * spatial_scale - offset;
+    T roi_center_h = current_roi[2] * spatial_scale - offset;
+    T roi_width = current_roi[3] * spatial_scale;
+    T roi_height = current_roi[4] * spatial_scale;
+    T theta = current_roi[5] * M_PI / 180.0;
+    T cos_theta = cos(theta);
+    T sin_theta = sin(theta);
+
+    AT_ASSERTM(
+        roi_width >= 0 && roi_height >= 0,
+        "ROIs in ROIAlignRotated do not have non-negative size!");
+
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+        ? sampling_ratio
+        : ceil(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+    // We do average (integral) pooling inside a bin
+    const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
+
+    // we want to precalculate indices and weights shared by all channels,
+    // this is the key point of optimization
+    std::vector<PreCalc<T>> pre_calc(
+        roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
+
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    T roi_start_h = -roi_height / 2.0;
+    T roi_start_w = -roi_width / 2.0;
+
+    pre_calc_for_bilinear_interpolate(
+        height,
+        width,
+        pooled_height,
+        pooled_width,
+        roi_bin_grid_h,
+        roi_bin_grid_w,
+        roi_start_h,
+        roi_start_w,
+        bin_size_h,
+        bin_size_w,
+        roi_bin_grid_h,
+        roi_bin_grid_w,
+        roi_center_h,
+        roi_center_w,
+        cos_theta,
+        sin_theta,
+        pre_calc);
+
+    for (int c = 0; c < channels; c++) {
+      int index_n_c = index_n + c * pooled_width * pooled_height;
+      const T* offset_input =
+          input + (roi_batch_ind * channels + c) * height * width;
+      int pre_calc_index = 0;
+
+      for (int ph = 0; ph < pooled_height; ph++) {
+        for (int pw = 0; pw < pooled_width; pw++) {
+          int index = index_n_c + ph * pooled_width + pw;
+
+          T output_val = 0.;
+          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+              PreCalc<T> pc = pre_calc[pre_calc_index];
+              output_val += pc.w1 * offset_input[pc.pos1] +
+                  pc.w2 * offset_input[pc.pos2] +
+                  pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4];
+
+              pre_calc_index += 1;
+            }
+          }
+          output_val /= count;
+
+          output[index] = output_val;
+        } // for pw
+      } // for ph
+    } // for c
+  } // for n
+}
+
+template <typename T>
+void ROIAlignRotatedBackward(
+    const int nthreads,
+    // may not be contiguous. should index using n_stride, etc
+    const T* grad_output,
+    const T& spatial_scale,
+    const int channels,
+    const int height,
+    const int width,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio,
+    T* grad_input,
+    const T* rois,
+    const int n_stride,
+    const int c_stride,
+    const int h_stride,
+    const int w_stride) {
+  for (int index = 0; index < nthreads; index++) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T* current_roi = rois + n * 6;
+    int roi_batch_ind = current_roi[0];
+
+    // Do not use rounding; this implementation detail is critical
+    // ROIAlignRotated supports align == true, i.e., continuous coordinate
+    // by default, thus the 0.5 offset
+    T offset = (T)0.5;
+    T roi_center_w = current_roi[1] * spatial_scale - offset;
+    T roi_center_h = current_roi[2] * spatial_scale - offset;
+    T roi_width = current_roi[3] * spatial_scale;
+    T roi_height = current_roi[4] * spatial_scale;
+    T theta = current_roi[5] * M_PI / 180.0;
+    T cos_theta = cos(theta);
+    T sin_theta = sin(theta);
+
+    AT_ASSERTM(
+        roi_width >= 0 && roi_height >= 0,
+        "ROIs in ROIAlignRotated do not have non-negative size!");
+
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    T* offset_grad_input =
+        grad_input + ((roi_batch_ind * channels + c) * height * width);
+
+    int output_offset = n * n_stride + c * c_stride;
+    const T* offset_grad_output = grad_output + output_offset;
+    const T grad_output_this_bin =
+        offset_grad_output[ph * h_stride + pw * w_stride];
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+        ? sampling_ratio
+        : ceil(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    T roi_start_h = -roi_height / 2.0;
+    T roi_start_w = -roi_width / 2.0;
+
+    // We do average (integral) pooling inside a bin
+    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+      const T yy = roi_start_h + ph * bin_size_h +
+          static_cast<T>(iy + .5f) * bin_size_h /
+              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const T xx = roi_start_w + pw * bin_size_w +
+            static_cast<T>(ix + .5f) * bin_size_w /
+                static_cast<T>(roi_bin_grid_w);
+
+        // Rotate by theta around the center and translate
+        T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+        T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+
+        T w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+
+        bilinear_interpolate_gradient(
+            height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high);
+
+        T g1 = grad_output_this_bin * w1 / count;
+        T g2 = grad_output_this_bin * w2 / count;
+        T g3 = grad_output_this_bin * w3 / count;
+        T g4 = grad_output_this_bin * w4 / count;
+
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          // atomic add is not needed for now since it is single threaded
+          add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
+          add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
+          add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
+          add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
+        } // if
+      } // ix
+    } // iy
+  } // for
+} // ROIAlignRotatedBackward
+
+at::Tensor ROIAlignRotated_forward_cpu(
+    const at::Tensor& input,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio) {
+  AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor");
+  AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
+
+  at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
+
+  at::CheckedFrom c = "ROIAlign_forward_cpu";
+  at::checkAllSameType(c, {input_t, rois_t});
+
+  auto num_rois = rois.size(0);
+  auto channels = input.size(1);
+  auto height = input.size(2);
+  auto width = input.size(3);
+
+  at::Tensor output = at::zeros(
+      {num_rois, channels, pooled_height, pooled_width}, input.options());
+
+  auto output_size = num_rois * pooled_height * pooled_width * channels;
+
+  if (output.numel() == 0) {
+    return output;
+  }
+
+  auto input_ = input.contiguous(), rois_ = rois.contiguous();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      input.scalar_type(), "ROIAlignRotated_forward", [&] {
+        ROIAlignRotatedForward<scalar_t>(
+            output_size,
+            input_.data_ptr<scalar_t>(),
+            spatial_scale,
+            channels,
+            height,
+            width,
+            pooled_height,
+            pooled_width,
+            sampling_ratio,
+            rois_.data_ptr<scalar_t>(),
+            output.data_ptr<scalar_t>());
+      });
+  return output;
+}
+
+at::Tensor ROIAlignRotated_backward_cpu(
+    const at::Tensor& grad,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int batch_size,
+    const int channels,
+    const int height,
+    const int width,
+    const int sampling_ratio) {
+  AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor");
+  AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
+
+  at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
+
+  at::CheckedFrom c = "ROIAlignRotated_backward_cpu";
+  at::checkAllSameType(c, {grad_t, rois_t});
+
+  at::Tensor grad_input =
+      at::zeros({batch_size, channels, height, width}, grad.options());
+
+  // handle possibly empty gradients
+  if (grad.numel() == 0) {
+    return grad_input;
+  }
+
+  // get stride values to ensure indexing into gradients is correct.
+  int n_stride = grad.stride(0);
+  int c_stride = grad.stride(1);
+  int h_stride = grad.stride(2);
+  int w_stride = grad.stride(3);
+
+  auto rois_ = rois.contiguous();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      grad.scalar_type(), "ROIAlignRotated_forward", [&] {
+        ROIAlignRotatedBackward<scalar_t>(
+            grad.numel(),
+            grad.data_ptr<scalar_t>(),
+            spatial_scale,
+            channels,
+            height,
+            width,
+            pooled_height,
+            pooled_width,
+            sampling_ratio,
+            grad_input.data_ptr<scalar_t>(),
+            rois_.data_ptr<scalar_t>(),
+            n_stride,
+            c_stride,
+            h_stride,
+            w_stride);
+      });
+  return grad_input;
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu b/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..fca186519143b168a912c880a4cf495a0a5a9322
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu
@@ -0,0 +1,443 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+
+// TODO make it in a common file
+#define CUDA_1D_KERNEL_LOOP(i, n)                            \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
+       i += blockDim.x * gridDim.x)
+
+// Note: this implementation originates from the Caffe2 ROIAlignRotated Op
+// and PyTorch ROIAlign (non-rotated) Op implementations.
+// The key difference between this implementation and those ones is
+// we don't do "legacy offset" in this version, as there aren't many previous
+// works, if any, using the "legacy" ROIAlignRotated Op.
+// This would make the interface a bit cleaner.
+
+namespace detectron2 {
+
+namespace {
+
+template <typename T>
+__device__ T bilinear_interpolate(
+    const T* input,
+    const int height,
+    const int width,
+    T y,
+    T x) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    return 0;
+  }
+
+  if (y < 0) {
+    y = 0;
+  }
+
+  if (x < 0) {
+    x = 0;
+  }
+
+  int y_low = (int)y;
+  int x_low = (int)x;
+  int y_high;
+  int x_high;
+
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+  // do bilinear interpolation
+  T v1 = input[y_low * width + x_low];
+  T v2 = input[y_low * width + x_high];
+  T v3 = input[y_high * width + x_low];
+  T v4 = input[y_high * width + x_high];
+  T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+  return val;
+}
+
+template <typename T>
+__device__ void bilinear_interpolate_gradient(
+    const int height,
+    const int width,
+    T y,
+    T x,
+    T& w1,
+    T& w2,
+    T& w3,
+    T& w4,
+    int& x_low,
+    int& x_high,
+    int& y_low,
+    int& y_high) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    w1 = w2 = w3 = w4 = 0.;
+    x_low = x_high = y_low = y_high = -1;
+    return;
+  }
+
+  if (y < 0) {
+    y = 0;
+  }
+
+  if (x < 0) {
+    x = 0;
+  }
+
+  y_low = (int)y;
+  x_low = (int)x;
+
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+
+  // reference in forward
+  // T v1 = input[y_low * width + x_low];
+  // T v2 = input[y_low * width + x_high];
+  // T v3 = input[y_high * width + x_low];
+  // T v4 = input[y_high * width + x_high];
+  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+  return;
+}
+
+} // namespace
+
+template <typename T>
+__global__ void RoIAlignRotatedForward(
+    const int nthreads,
+    const T* input,
+    const T spatial_scale,
+    const int channels,
+    const int height,
+    const int width,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio,
+    const T* rois,
+    T* top_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T* current_roi = rois + n * 6;
+    int roi_batch_ind = current_roi[0];
+
+    // Do not use rounding; this implementation detail is critical
+    // ROIAlignRotated supports align == true, i.e., continuous coordinate
+    // by default, thus the 0.5 offset
+    T offset = (T)0.5;
+    T roi_center_w = current_roi[1] * spatial_scale - offset;
+    T roi_center_h = current_roi[2] * spatial_scale - offset;
+    T roi_width = current_roi[3] * spatial_scale;
+    T roi_height = current_roi[4] * spatial_scale;
+    T theta = current_roi[5] * M_PI / 180.0;
+    T cos_theta = cos(theta);
+    T sin_theta = sin(theta);
+
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    const T* offset_input =
+        input + (roi_batch_ind * channels + c) * height * width;
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+        ? sampling_ratio
+        : ceil(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    T roi_start_h = -roi_height / 2.0;
+    T roi_start_w = -roi_width / 2.0;
+
+    // We do average (inte  gral) pooling inside a bin
+    const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
+
+    T output_val = 0.;
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1
+    {
+      const T yy = roi_start_h + ph * bin_size_h +
+          static_cast<T>(iy + .5f) * bin_size_h /
+              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const T xx = roi_start_w + pw * bin_size_w +
+            static_cast<T>(ix + .5f) * bin_size_w /
+                static_cast<T>(roi_bin_grid_w);
+
+        // Rotate by theta around the center and translate
+        T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+        T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+
+        T val = bilinear_interpolate(offset_input, height, width, y, x);
+        output_val += val;
+      }
+    }
+    output_val /= count;
+
+    top_data[index] = output_val;
+  }
+}
+
+template <typename T>
+__global__ void RoIAlignRotatedBackwardFeature(
+    const int nthreads,
+    const T* top_diff,
+    const int num_rois,
+    const T spatial_scale,
+    const int channels,
+    const int height,
+    const int width,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio,
+    T* bottom_diff,
+    const T* rois) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+
+    const T* current_roi = rois + n * 6;
+    int roi_batch_ind = current_roi[0];
+
+    // Do not use rounding; this implementation detail is critical
+    // ROIAlignRotated supports align == true, i.e., continuous coordinate
+    // by default, thus the 0.5 offset
+    T offset = (T)0.5;
+    T roi_center_w = current_roi[1] * spatial_scale - offset;
+    T roi_center_h = current_roi[2] * spatial_scale - offset;
+    T roi_width = current_roi[3] * spatial_scale;
+    T roi_height = current_roi[4] * spatial_scale;
+    T theta = current_roi[5] * M_PI / 180.0;
+    T cos_theta = cos(theta);
+    T sin_theta = sin(theta);
+
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    T* offset_bottom_diff =
+        bottom_diff + (roi_batch_ind * channels + c) * height * width;
+
+    int top_offset = (n * channels + c) * pooled_height * pooled_width;
+    const T* offset_top_diff = top_diff + top_offset;
+    const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+        ? sampling_ratio
+        : ceil(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
+
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    T roi_start_h = -roi_height / 2.0;
+    T roi_start_w = -roi_width / 2.0;
+
+    // We do average (integral) pooling inside a bin
+    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1
+    {
+      const T yy = roi_start_h + ph * bin_size_h +
+          static_cast<T>(iy + .5f) * bin_size_h /
+              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const T xx = roi_start_w + pw * bin_size_w +
+            static_cast<T>(ix + .5f) * bin_size_w /
+                static_cast<T>(roi_bin_grid_w);
+
+        // Rotate by theta around the center and translate
+        T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+        T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+
+        T w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+
+        bilinear_interpolate_gradient(
+            height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high);
+
+        T g1 = top_diff_this_bin * w1 / count;
+        T g2 = top_diff_this_bin * w2 / count;
+        T g3 = top_diff_this_bin * w3 / count;
+        T g4 = top_diff_this_bin * w4 / count;
+
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          atomicAdd(
+              offset_bottom_diff + y_low * width + x_low, static_cast<T>(g1));
+          atomicAdd(
+              offset_bottom_diff + y_low * width + x_high, static_cast<T>(g2));
+          atomicAdd(
+              offset_bottom_diff + y_high * width + x_low, static_cast<T>(g3));
+          atomicAdd(
+              offset_bottom_diff + y_high * width + x_high, static_cast<T>(g4));
+        } // if
+      } // ix
+    } // iy
+  } // CUDA_1D_KERNEL_LOOP
+} // RoIAlignRotatedBackward
+
+at::Tensor ROIAlignRotated_forward_cuda(
+    const at::Tensor& input,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio) {
+  AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor");
+  AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor");
+  at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
+
+  at::CheckedFrom c = "ROIAlignRotated_forward_cuda";
+  at::checkAllSameGPU(c, {input_t, rois_t});
+  at::checkAllSameType(c, {input_t, rois_t});
+  at::cuda::CUDAGuard device_guard(input.device());
+
+  auto num_rois = rois.size(0);
+  auto channels = input.size(1);
+  auto height = input.size(2);
+  auto width = input.size(3);
+
+  auto output = at::empty(
+      {num_rois, channels, pooled_height, pooled_width}, input.options());
+  auto output_size = num_rois * pooled_height * pooled_width * channels;
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  dim3 grid(std::min(
+      at::cuda::ATenCeilDiv(
+          static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
+      static_cast<int64_t>(4096)));
+  dim3 block(512);
+
+  if (output.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return output;
+  }
+
+  auto input_ = input.contiguous(), rois_ = rois.contiguous();
+  AT_DISPATCH_FLOATING_TYPES(
+      input.scalar_type(), "ROIAlignRotated_forward", [&] {
+        RoIAlignRotatedForward<scalar_t><<<grid, block, 0, stream>>>(
+            output_size,
+            input_.data_ptr<scalar_t>(),
+            spatial_scale,
+            channels,
+            height,
+            width,
+            pooled_height,
+            pooled_width,
+            sampling_ratio,
+            rois_.data_ptr<scalar_t>(),
+            output.data_ptr<scalar_t>());
+      });
+  cudaDeviceSynchronize();
+  AT_CUDA_CHECK(cudaGetLastError());
+  return output;
+}
+
+// TODO remove the dependency on input and use instead its sizes -> save memory
+at::Tensor ROIAlignRotated_backward_cuda(
+    const at::Tensor& grad,
+    const at::Tensor& rois,
+    const float spatial_scale,
+    const int pooled_height,
+    const int pooled_width,
+    const int batch_size,
+    const int channels,
+    const int height,
+    const int width,
+    const int sampling_ratio) {
+  AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor");
+  AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor");
+
+  at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
+  at::CheckedFrom c = "ROIAlign_backward_cuda";
+  at::checkAllSameGPU(c, {grad_t, rois_t});
+  at::checkAllSameType(c, {grad_t, rois_t});
+  at::cuda::CUDAGuard device_guard(grad.device());
+
+  auto num_rois = rois.size(0);
+  auto grad_input =
+      at::zeros({batch_size, channels, height, width}, grad.options());
+
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  dim3 grid(std::min(
+      at::cuda::ATenCeilDiv(
+          static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
+      static_cast<int64_t>(4096)));
+  dim3 block(512);
+
+  // handle possibly empty gradients
+  if (grad.numel() == 0) {
+    AT_CUDA_CHECK(cudaGetLastError());
+    return grad_input;
+  }
+
+  auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
+  AT_DISPATCH_FLOATING_TYPES(
+      grad.scalar_type(), "ROIAlignRotated_backward", [&] {
+        RoIAlignRotatedBackwardFeature<scalar_t><<<grid, block, 0, stream>>>(
+            grad.numel(),
+            grad_.data_ptr<scalar_t>(),
+            num_rois,
+            spatial_scale,
+            channels,
+            height,
+            width,
+            pooled_height,
+            pooled_width,
+            sampling_ratio,
+            grad_input.data_ptr<scalar_t>(),
+            rois_.data_ptr<scalar_t>());
+      });
+  AT_CUDA_CHECK(cudaGetLastError());
+  return grad_input;
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h b/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
new file mode 100644
index 0000000000000000000000000000000000000000..b65888b1be11881a776827b5212f08b8f63138f9
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
@@ -0,0 +1,35 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#pragma once
+#include <torch/types.h>
+
+namespace detectron2 {
+
+at::Tensor box_iou_rotated_cpu(
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2);
+
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+at::Tensor box_iou_rotated_cuda(
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2);
+#endif
+
+// Interface for Python
+// inline is needed to prevent multiple function definitions when this header is
+// included by different cpps
+inline at::Tensor box_iou_rotated(
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2) {
+  assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
+  if (boxes1.device().is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+    return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous());
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+
+  return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous());
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp b/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c843487b5fa4e8077dd27402ec99009266ddda8d
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
@@ -0,0 +1,39 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#include "box_iou_rotated.h"
+#include "box_iou_rotated_utils.h"
+
+namespace detectron2 {
+
+template <typename T>
+void box_iou_rotated_cpu_kernel(
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2,
+    at::Tensor& ious) {
+  auto num_boxes1 = boxes1.size(0);
+  auto num_boxes2 = boxes2.size(0);
+
+  for (int i = 0; i < num_boxes1; i++) {
+    for (int j = 0; j < num_boxes2; j++) {
+      ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
+          boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>());
+    }
+  }
+}
+
+at::Tensor box_iou_rotated_cpu(
+    // input must be contiguous:
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2) {
+  auto num_boxes1 = boxes1.size(0);
+  auto num_boxes2 = boxes2.size(0);
+  at::Tensor ious =
+      at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
+
+  box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious);
+
+  // reshape from 1d array to 2d array
+  auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
+  return ious.reshape(shape);
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu b/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..952710e53041187907fbd113f8d0d0fa24134a86
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
@@ -0,0 +1,130 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+#include "box_iou_rotated_utils.h"
+
+namespace detectron2 {
+
+// 2D block with 32 * 16 = 512 threads per block
+const int BLOCK_DIM_X = 32;
+const int BLOCK_DIM_Y = 16;
+
+template <typename T>
+__global__ void box_iou_rotated_cuda_kernel(
+    const int n_boxes1,
+    const int n_boxes2,
+    const T* dev_boxes1,
+    const T* dev_boxes2,
+    T* dev_ious) {
+  const int row_start = blockIdx.x * blockDim.x;
+  const int col_start = blockIdx.y * blockDim.y;
+
+  const int row_size = min(n_boxes1 - row_start, blockDim.x);
+  const int col_size = min(n_boxes2 - col_start, blockDim.y);
+
+  __shared__ float block_boxes1[BLOCK_DIM_X * 5];
+  __shared__ float block_boxes2[BLOCK_DIM_Y * 5];
+
+  // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y
+  if (threadIdx.x < row_size && threadIdx.y == 0) {
+    block_boxes1[threadIdx.x * 5 + 0] =
+        dev_boxes1[(row_start + threadIdx.x) * 5 + 0];
+    block_boxes1[threadIdx.x * 5 + 1] =
+        dev_boxes1[(row_start + threadIdx.x) * 5 + 1];
+    block_boxes1[threadIdx.x * 5 + 2] =
+        dev_boxes1[(row_start + threadIdx.x) * 5 + 2];
+    block_boxes1[threadIdx.x * 5 + 3] =
+        dev_boxes1[(row_start + threadIdx.x) * 5 + 3];
+    block_boxes1[threadIdx.x * 5 + 4] =
+        dev_boxes1[(row_start + threadIdx.x) * 5 + 4];
+  }
+
+  if (threadIdx.x < col_size && threadIdx.y == 0) {
+    block_boxes2[threadIdx.x * 5 + 0] =
+        dev_boxes2[(col_start + threadIdx.x) * 5 + 0];
+    block_boxes2[threadIdx.x * 5 + 1] =
+        dev_boxes2[(col_start + threadIdx.x) * 5 + 1];
+    block_boxes2[threadIdx.x * 5 + 2] =
+        dev_boxes2[(col_start + threadIdx.x) * 5 + 2];
+    block_boxes2[threadIdx.x * 5 + 3] =
+        dev_boxes2[(col_start + threadIdx.x) * 5 + 3];
+    block_boxes2[threadIdx.x * 5 + 4] =
+        dev_boxes2[(col_start + threadIdx.x) * 5 + 4];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size && threadIdx.y < col_size) {
+    int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y;
+    dev_ious[offset] = single_box_iou_rotated<T>(
+        block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5);
+  }
+}
+
+at::Tensor box_iou_rotated_cuda(
+    // input must be contiguous
+    const at::Tensor& boxes1,
+    const at::Tensor& boxes2) {
+  using scalar_t = float;
+  AT_ASSERTM(
+      boxes1.scalar_type() == at::kFloat, "boxes1 must be a float tensor");
+  AT_ASSERTM(
+      boxes2.scalar_type() == at::kFloat, "boxes2 must be a float tensor");
+  AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor");
+  AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor");
+  at::cuda::CUDAGuard device_guard(boxes1.device());
+
+  auto num_boxes1 = boxes1.size(0);
+  auto num_boxes2 = boxes2.size(0);
+
+  at::Tensor ious =
+      at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
+
+  bool transpose = false;
+  if (num_boxes1 > 0 && num_boxes2 > 0) {
+    scalar_t *data1 = boxes1.data_ptr<scalar_t>(),
+             *data2 = boxes2.data_ptr<scalar_t>();
+
+    if (num_boxes2 > 65535 * BLOCK_DIM_Y) {
+      AT_ASSERTM(
+          num_boxes1 <= 65535 * BLOCK_DIM_Y,
+          "Too many boxes for box_iou_rotated_cuda!");
+      // x dim is allowed to be large, but y dim cannot,
+      // so we transpose the two to avoid "invalid configuration argument"
+      // error. We assume one of them is small. Otherwise the result is hard to
+      // fit in memory anyway.
+      std::swap(num_boxes1, num_boxes2);
+      std::swap(data1, data2);
+      transpose = true;
+    }
+
+    const int blocks_x =
+        at::cuda::ATenCeilDiv(static_cast<int>(num_boxes1), BLOCK_DIM_X);
+    const int blocks_y =
+        at::cuda::ATenCeilDiv(static_cast<int>(num_boxes2), BLOCK_DIM_Y);
+
+    dim3 blocks(blocks_x, blocks_y);
+    dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+    box_iou_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        num_boxes1,
+        num_boxes2,
+        data1,
+        data2,
+        (scalar_t*)ious.data_ptr<scalar_t>());
+
+    AT_CUDA_CHECK(cudaGetLastError());
+  }
+
+  // reshape from 1d array to 2d array
+  auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
+  if (transpose) {
+    return ious.view(shape).t();
+  } else {
+    return ious.view(shape);
+  }
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h b/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..b54a5dde2ca11a74d29c4d8adb7fe1634f5baf9c
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h
@@ -0,0 +1,370 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#pragma once
+
+#include <cassert>
+#include <cmath>
+
+#if defined(__CUDACC__) || __HCC__ == 1 || __HIP__ == 1
+// Designates functions callable from the host (CPU) and the device (GPU)
+#define HOST_DEVICE __host__ __device__
+#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
+#else
+#include <algorithm>
+#define HOST_DEVICE
+#define HOST_DEVICE_INLINE HOST_DEVICE inline
+#endif
+
+namespace detectron2 {
+
+namespace {
+
+template <typename T>
+struct RotatedBox {
+  T x_ctr, y_ctr, w, h, a;
+};
+
+template <typename T>
+struct Point {
+  T x, y;
+  HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {}
+  HOST_DEVICE_INLINE Point operator+(const Point& p) const {
+    return Point(x + p.x, y + p.y);
+  }
+  HOST_DEVICE_INLINE Point& operator+=(const Point& p) {
+    x += p.x;
+    y += p.y;
+    return *this;
+  }
+  HOST_DEVICE_INLINE Point operator-(const Point& p) const {
+    return Point(x - p.x, y - p.y);
+  }
+  HOST_DEVICE_INLINE Point operator*(const T coeff) const {
+    return Point(x * coeff, y * coeff);
+  }
+};
+
+template <typename T>
+HOST_DEVICE_INLINE T dot_2d(const Point<T>& A, const Point<T>& B) {
+  return A.x * B.x + A.y * B.y;
+}
+
+// R: result type. can be different from input type
+template <typename T, typename R = T>
+HOST_DEVICE_INLINE R cross_2d(const Point<T>& A, const Point<T>& B) {
+  return static_cast<R>(A.x) * static_cast<R>(B.y) -
+      static_cast<R>(B.x) * static_cast<R>(A.y);
+}
+
+template <typename T>
+HOST_DEVICE_INLINE void get_rotated_vertices(
+    const RotatedBox<T>& box,
+    Point<T> (&pts)[4]) {
+  // M_PI / 180. == 0.01745329251
+  double theta = box.a * 0.01745329251;
+  T cosTheta2 = (T)cos(theta) * 0.5f;
+  T sinTheta2 = (T)sin(theta) * 0.5f;
+
+  // y: top --> down; x: left --> right
+  pts[0].x = box.x_ctr + sinTheta2 * box.h + cosTheta2 * box.w;
+  pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
+  pts[1].x = box.x_ctr - sinTheta2 * box.h + cosTheta2 * box.w;
+  pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
+  pts[2].x = 2 * box.x_ctr - pts[0].x;
+  pts[2].y = 2 * box.y_ctr - pts[0].y;
+  pts[3].x = 2 * box.x_ctr - pts[1].x;
+  pts[3].y = 2 * box.y_ctr - pts[1].y;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE int get_intersection_points(
+    const Point<T> (&pts1)[4],
+    const Point<T> (&pts2)[4],
+    Point<T> (&intersections)[24]) {
+  // Line vector
+  // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
+  Point<T> vec1[4], vec2[4];
+  for (int i = 0; i < 4; i++) {
+    vec1[i] = pts1[(i + 1) % 4] - pts1[i];
+    vec2[i] = pts2[(i + 1) % 4] - pts2[i];
+  }
+
+  // When computing the intersection area, it doesn't hurt if we have
+  // more (duplicated/approximate) intersections/vertices than needed,
+  // while it can cause drastic difference if we miss an intersection/vertex.
+  // Therefore, we add an epsilon to relax the comparisons between
+  // the float point numbers that decide the intersection points.
+  double EPS = 1e-5;
+
+  // Line test - test all line combos for intersection
+  int num = 0; // number of intersections
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      // Solve for 2x2 Ax=b
+      T det = cross_2d<T>(vec2[j], vec1[i]);
+
+      // This takes care of parallel lines
+      if (fabs(det) <= 1e-14) {
+        continue;
+      }
+
+      auto vec12 = pts2[j] - pts1[i];
+
+      T t1 = cross_2d<T>(vec2[j], vec12) / det;
+      T t2 = cross_2d<T>(vec1[i], vec12) / det;
+
+      if (t1 > -EPS && t1 < 1.0f + EPS && t2 > -EPS && t2 < 1.0f + EPS) {
+        intersections[num++] = pts1[i] + vec1[i] * t1;
+      }
+    }
+  }
+
+  // Check for vertices of rect1 inside rect2
+  {
+    const auto& AB = vec2[0];
+    const auto& DA = vec2[3];
+    auto ABdotAB = dot_2d<T>(AB, AB);
+    auto ADdotAD = dot_2d<T>(DA, DA);
+    for (int i = 0; i < 4; i++) {
+      // assume ABCD is the rectangle, and P is the point to be judged
+      // P is inside ABCD iff. P's projection on AB lies within AB
+      // and P's projection on AD lies within AD
+
+      auto AP = pts1[i] - pts2[0];
+
+      auto APdotAB = dot_2d<T>(AP, AB);
+      auto APdotAD = -dot_2d<T>(AP, DA);
+
+      if ((APdotAB > -EPS) && (APdotAD > -EPS) && (APdotAB < ABdotAB + EPS) &&
+          (APdotAD < ADdotAD + EPS)) {
+        intersections[num++] = pts1[i];
+      }
+    }
+  }
+
+  // Reverse the check - check for vertices of rect2 inside rect1
+  {
+    const auto& AB = vec1[0];
+    const auto& DA = vec1[3];
+    auto ABdotAB = dot_2d<T>(AB, AB);
+    auto ADdotAD = dot_2d<T>(DA, DA);
+    for (int i = 0; i < 4; i++) {
+      auto AP = pts2[i] - pts1[0];
+
+      auto APdotAB = dot_2d<T>(AP, AB);
+      auto APdotAD = -dot_2d<T>(AP, DA);
+
+      if ((APdotAB > -EPS) && (APdotAD > -EPS) && (APdotAB < ABdotAB + EPS) &&
+          (APdotAD < ADdotAD + EPS)) {
+        intersections[num++] = pts2[i];
+      }
+    }
+  }
+
+  return num;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE int convex_hull_graham(
+    const Point<T> (&p)[24],
+    const int& num_in,
+    Point<T> (&q)[24],
+    bool shift_to_zero = false) {
+  assert(num_in >= 2);
+
+  // Step 1:
+  // Find point with minimum y
+  // if more than 1 points have the same minimum y,
+  // pick the one with the minimum x.
+  int t = 0;
+  for (int i = 1; i < num_in; i++) {
+    if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
+      t = i;
+    }
+  }
+  auto& start = p[t]; // starting point
+
+  // Step 2:
+  // Subtract starting point from every points (for sorting in the next step)
+  for (int i = 0; i < num_in; i++) {
+    q[i] = p[i] - start;
+  }
+
+  // Swap the starting point to position 0
+  auto tmp = q[0];
+  q[0] = q[t];
+  q[t] = tmp;
+
+  // Step 3:
+  // Sort point 1 ~ num_in according to their relative cross-product values
+  // (essentially sorting according to angles)
+  // If the angles are the same, sort according to their distance to origin
+  T dist[24];
+#if defined(__CUDACC__) || __HCC__ == 1 || __HIP__ == 1
+  // compute distance to origin before sort, and sort them together with the
+  // points
+  for (int i = 0; i < num_in; i++) {
+    dist[i] = dot_2d<T>(q[i], q[i]);
+  }
+
+  // CUDA version
+  // In the future, we can potentially use thrust
+  // for sorting here to improve speed (though not guaranteed)
+  for (int i = 1; i < num_in - 1; i++) {
+    for (int j = i + 1; j < num_in; j++) {
+      T crossProduct = cross_2d<T>(q[i], q[j]);
+      if ((crossProduct < -1e-6) ||
+          (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) {
+        auto q_tmp = q[i];
+        q[i] = q[j];
+        q[j] = q_tmp;
+        auto dist_tmp = dist[i];
+        dist[i] = dist[j];
+        dist[j] = dist_tmp;
+      }
+    }
+  }
+#else
+  // CPU version
+  std::sort(
+      q + 1, q + num_in, [](const Point<T>& A, const Point<T>& B) -> bool {
+        T temp = cross_2d<T>(A, B);
+        if (fabs(temp) < 1e-6) {
+          return dot_2d<T>(A, A) < dot_2d<T>(B, B);
+        } else {
+          return temp > 0;
+        }
+      });
+  // compute distance to origin after sort, since the points are now different.
+  for (int i = 0; i < num_in; i++) {
+    dist[i] = dot_2d<T>(q[i], q[i]);
+  }
+#endif
+
+  // Step 4:
+  // Make sure there are at least 2 points (that don't overlap with each other)
+  // in the stack
+  int k; // index of the non-overlapped second point
+  for (k = 1; k < num_in; k++) {
+    if (dist[k] > 1e-8) {
+      break;
+    }
+  }
+  if (k == num_in) {
+    // We reach the end, which means the convex hull is just one point
+    q[0] = p[t];
+    return 1;
+  }
+  q[1] = q[k];
+  int m = 2; // 2 points in the stack
+  // Step 5:
+  // Finally we can start the scanning process.
+  // When a non-convex relationship between the 3 points is found
+  // (either concave shape or duplicated points),
+  // we pop the previous point from the stack
+  // until the 3-point relationship is convex again, or
+  // until the stack only contains two points
+  for (int i = k + 1; i < num_in; i++) {
+    while (m > 1) {
+      auto q1 = q[i] - q[m - 2], q2 = q[m - 1] - q[m - 2];
+      // cross_2d() uses FMA and therefore computes round(round(q1.x*q2.y) -
+      // q2.x*q1.y) So it may not return 0 even when q1==q2. Therefore we
+      // compare round(q1.x*q2.y) and round(q2.x*q1.y) directly. (round means
+      // round to nearest floating point).
+      if (q1.x * q2.y >= q2.x * q1.y)
+        m--;
+      else
+        break;
+    }
+    // Using double also helps, but float can solve the issue for now.
+    // while (m > 1 && cross_2d<T, double>(q[i] - q[m - 2], q[m - 1] - q[m - 2])
+    // >= 0) {
+    //     m--;
+    // }
+    q[m++] = q[i];
+  }
+
+  // Step 6 (Optional):
+  // In general sense we need the original coordinates, so we
+  // need to shift the points back (reverting Step 2)
+  // But if we're only interested in getting the area/perimeter of the shape
+  // We can simply return.
+  if (!shift_to_zero) {
+    for (int i = 0; i < m; i++) {
+      q[i] += start;
+    }
+  }
+
+  return m;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE T polygon_area(const Point<T> (&q)[24], const int& m) {
+  if (m <= 2) {
+    return 0;
+  }
+
+  T area = 0;
+  for (int i = 1; i < m - 1; i++) {
+    area += fabs(cross_2d<T>(q[i] - q[0], q[i + 1] - q[0]));
+  }
+
+  return area / 2.0;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE T rotated_boxes_intersection(
+    const RotatedBox<T>& box1,
+    const RotatedBox<T>& box2) {
+  // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
+  // from rotated_rect_intersection_pts
+  Point<T> intersectPts[24], orderedPts[24];
+
+  Point<T> pts1[4];
+  Point<T> pts2[4];
+  get_rotated_vertices<T>(box1, pts1);
+  get_rotated_vertices<T>(box2, pts2);
+
+  int num = get_intersection_points<T>(pts1, pts2, intersectPts);
+
+  if (num <= 2) {
+    return 0.0;
+  }
+
+  // Convex Hull to order the intersection points in clockwise order and find
+  // the contour area.
+  int num_convex = convex_hull_graham<T>(intersectPts, num, orderedPts, true);
+  return polygon_area<T>(orderedPts, num_convex);
+}
+
+} // namespace
+
+template <typename T>
+HOST_DEVICE_INLINE T
+single_box_iou_rotated(T const* const box1_raw, T const* const box2_raw) {
+  // shift center to the middle point to achieve higher precision in result
+  RotatedBox<T> box1, box2;
+  auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
+  auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
+  box1.x_ctr = box1_raw[0] - center_shift_x;
+  box1.y_ctr = box1_raw[1] - center_shift_y;
+  box1.w = box1_raw[2];
+  box1.h = box1_raw[3];
+  box1.a = box1_raw[4];
+  box2.x_ctr = box2_raw[0] - center_shift_x;
+  box2.y_ctr = box2_raw[1] - center_shift_y;
+  box2.w = box2_raw[2];
+  box2.h = box2_raw[3];
+  box2.a = box2_raw[4];
+
+  T area1 = box1.w * box1.h;
+  T area2 = box2.w * box2.h;
+  if (area1 < 1e-14 || area2 < 1e-14) {
+    return 0.f;
+  }
+
+  T intersection = rotated_boxes_intersection<T>(box1, box2);
+  T iou = intersection / (area1 + area2 - intersection);
+  return iou;
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.cpp b/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a5b7b907c06720fefc77b0dfd921b8ec3ecf2be
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.cpp
@@ -0,0 +1,507 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#include "cocoeval.h"
+#include <time.h>
+#include <algorithm>
+#include <cstdint>
+#include <numeric>
+
+using namespace pybind11::literals;
+
+namespace detectron2 {
+
+namespace COCOeval {
+
+// Sort detections from highest score to lowest, such that
+// detection_instances[detection_sorted_indices[t]] >=
+// detection_instances[detection_sorted_indices[t+1]].  Use stable_sort to match
+// original COCO API
+void SortInstancesByDetectionScore(
+    const std::vector<InstanceAnnotation>& detection_instances,
+    std::vector<uint64_t>* detection_sorted_indices) {
+  detection_sorted_indices->resize(detection_instances.size());
+  std::iota(
+      detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);
+  std::stable_sort(
+      detection_sorted_indices->begin(),
+      detection_sorted_indices->end(),
+      [&detection_instances](size_t j1, size_t j2) {
+        return detection_instances[j1].score > detection_instances[j2].score;
+      });
+}
+
+// Partition the ground truth objects based on whether or not to ignore them
+// based on area
+void SortInstancesByIgnore(
+    const std::array<double, 2>& area_range,
+    const std::vector<InstanceAnnotation>& ground_truth_instances,
+    std::vector<uint64_t>* ground_truth_sorted_indices,
+    std::vector<bool>* ignores) {
+  ignores->clear();
+  ignores->reserve(ground_truth_instances.size());
+  for (auto o : ground_truth_instances) {
+    ignores->push_back(
+        o.ignore || o.area < area_range[0] || o.area > area_range[1]);
+  }
+
+  ground_truth_sorted_indices->resize(ground_truth_instances.size());
+  std::iota(
+      ground_truth_sorted_indices->begin(),
+      ground_truth_sorted_indices->end(),
+      0);
+  std::stable_sort(
+      ground_truth_sorted_indices->begin(),
+      ground_truth_sorted_indices->end(),
+      [&ignores](size_t j1, size_t j2) {
+        return (int)(*ignores)[j1] < (int)(*ignores)[j2];
+      });
+}
+
+// For each IOU threshold, greedily match each detected instance to a ground
+// truth instance (if possible) and store the results
+void MatchDetectionsToGroundTruth(
+    const std::vector<InstanceAnnotation>& detection_instances,
+    const std::vector<uint64_t>& detection_sorted_indices,
+    const std::vector<InstanceAnnotation>& ground_truth_instances,
+    const std::vector<uint64_t>& ground_truth_sorted_indices,
+    const std::vector<bool>& ignores,
+    const std::vector<std::vector<double>>& ious,
+    const std::vector<double>& iou_thresholds,
+    const std::array<double, 2>& area_range,
+    ImageEvaluation* results) {
+  // Initialize memory to store return data matches and ignore
+  const int num_iou_thresholds = iou_thresholds.size();
+  const int num_ground_truth = ground_truth_sorted_indices.size();
+  const int num_detections = detection_sorted_indices.size();
+  std::vector<uint64_t> ground_truth_matches(
+      num_iou_thresholds * num_ground_truth, 0);
+  std::vector<uint64_t>& detection_matches = results->detection_matches;
+  std::vector<bool>& detection_ignores = results->detection_ignores;
+  std::vector<bool>& ground_truth_ignores = results->ground_truth_ignores;
+  detection_matches.resize(num_iou_thresholds * num_detections, 0);
+  detection_ignores.resize(num_iou_thresholds * num_detections, false);
+  ground_truth_ignores.resize(num_ground_truth);
+  for (auto g = 0; g < num_ground_truth; ++g) {
+    ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]];
+  }
+
+  for (auto t = 0; t < num_iou_thresholds; ++t) {
+    for (auto d = 0; d < num_detections; ++d) {
+      // information about best match so far (match=-1 -> unmatched)
+      double best_iou = std::min(iou_thresholds[t], 1 - 1e-10);
+      int match = -1;
+      for (auto g = 0; g < num_ground_truth; ++g) {
+        // if this ground truth instance is already matched and not a
+        // crowd, it cannot be matched to another detection
+        if (ground_truth_matches[t * num_ground_truth + g] > 0 &&
+            !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) {
+          continue;
+        }
+
+        // if detected instance matched to a regular ground truth
+        // instance, we can break on the first ground truth instance
+        // tagged as ignore (because they are sorted by the ignore tag)
+        if (match >= 0 && !ground_truth_ignores[match] &&
+            ground_truth_ignores[g]) {
+          break;
+        }
+
+        // if IOU overlap is the best so far, store the match appropriately
+        if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) {
+          best_iou = ious[d][ground_truth_sorted_indices[g]];
+          match = g;
+        }
+      }
+      // if match was made, store id of match for both detection and
+      // ground truth
+      if (match >= 0) {
+        detection_ignores[t * num_detections + d] = ground_truth_ignores[match];
+        detection_matches[t * num_detections + d] =
+            ground_truth_instances[ground_truth_sorted_indices[match]].id;
+        ground_truth_matches[t * num_ground_truth + match] =
+            detection_instances[detection_sorted_indices[d]].id;
+      }
+
+      // set unmatched detections outside of area range to ignore
+      const InstanceAnnotation& detection =
+          detection_instances[detection_sorted_indices[d]];
+      detection_ignores[t * num_detections + d] =
+          detection_ignores[t * num_detections + d] ||
+          (detection_matches[t * num_detections + d] == 0 &&
+           (detection.area < area_range[0] || detection.area > area_range[1]));
+    }
+  }
+
+  // store detection score results
+  results->detection_scores.resize(detection_sorted_indices.size());
+  for (size_t d = 0; d < detection_sorted_indices.size(); ++d) {
+    results->detection_scores[d] =
+        detection_instances[detection_sorted_indices[d]].score;
+  }
+}
+
+std::vector<ImageEvaluation> EvaluateImages(
+    const std::vector<std::array<double, 2>>& area_ranges,
+    int max_detections,
+    const std::vector<double>& iou_thresholds,
+    const ImageCategoryInstances<std::vector<double>>& image_category_ious,
+    const ImageCategoryInstances<InstanceAnnotation>&
+        image_category_ground_truth_instances,
+    const ImageCategoryInstances<InstanceAnnotation>&
+        image_category_detection_instances) {
+  const int num_area_ranges = area_ranges.size();
+  const int num_images = image_category_ground_truth_instances.size();
+  const int num_categories =
+      image_category_ious.size() > 0 ? image_category_ious[0].size() : 0;
+  std::vector<uint64_t> detection_sorted_indices;
+  std::vector<uint64_t> ground_truth_sorted_indices;
+  std::vector<bool> ignores;
+  std::vector<ImageEvaluation> results_all(
+      num_images * num_area_ranges * num_categories);
+
+  // Store results for each image, category, and area range combination. Results
+  // for each IOU threshold are packed into the same ImageEvaluation object
+  for (auto i = 0; i < num_images; ++i) {
+    for (auto c = 0; c < num_categories; ++c) {
+      const std::vector<InstanceAnnotation>& ground_truth_instances =
+          image_category_ground_truth_instances[i][c];
+      const std::vector<InstanceAnnotation>& detection_instances =
+          image_category_detection_instances[i][c];
+
+      SortInstancesByDetectionScore(
+          detection_instances, &detection_sorted_indices);
+      if ((int)detection_sorted_indices.size() > max_detections) {
+        detection_sorted_indices.resize(max_detections);
+      }
+
+      for (size_t a = 0; a < area_ranges.size(); ++a) {
+        SortInstancesByIgnore(
+            area_ranges[a],
+            ground_truth_instances,
+            &ground_truth_sorted_indices,
+            &ignores);
+
+        MatchDetectionsToGroundTruth(
+            detection_instances,
+            detection_sorted_indices,
+            ground_truth_instances,
+            ground_truth_sorted_indices,
+            ignores,
+            image_category_ious[i][c],
+            iou_thresholds,
+            area_ranges[a],
+            &results_all
+                [c * num_area_ranges * num_images + a * num_images + i]);
+      }
+    }
+  }
+
+  return results_all;
+}
+
+// Convert a python list to a vector
+template <typename T>
+std::vector<T> list_to_vec(const py::list& l) {
+  std::vector<T> v(py::len(l));
+  for (int i = 0; i < (int)py::len(l); ++i) {
+    v[i] = l[i].cast<T>();
+  }
+  return v;
+}
+
+// Helper function to Accumulate()
+// Considers the evaluation results applicable to a particular category, area
+// range, and max_detections parameter setting, which begin at
+// evaluations[evaluation_index].  Extracts a sorted list of length n of all
+// applicable detection instances concatenated across all images in the dataset,
+// which are represented by the outputs evaluation_indices, detection_scores,
+// image_detection_indices, and detection_sorted_indices--all of which are
+// length n. evaluation_indices[i] stores the applicable index into
+// evaluations[] for instance i, which has detection score detection_score[i],
+// and is the image_detection_indices[i]'th of the list of detections
+// for the image containing i.  detection_sorted_indices[] defines a sorted
+// permutation of the 3 other outputs
+int BuildSortedDetectionList(
+    const std::vector<ImageEvaluation>& evaluations,
+    const int64_t evaluation_index,
+    const int64_t num_images,
+    const int max_detections,
+    std::vector<uint64_t>* evaluation_indices,
+    std::vector<double>* detection_scores,
+    std::vector<uint64_t>* detection_sorted_indices,
+    std::vector<uint64_t>* image_detection_indices) {
+  assert(evaluations.size() >= evaluation_index + num_images);
+
+  // Extract a list of object instances of the applicable category, area
+  // range, and max detections requirements such that they can be sorted
+  image_detection_indices->clear();
+  evaluation_indices->clear();
+  detection_scores->clear();
+  image_detection_indices->reserve(num_images * max_detections);
+  evaluation_indices->reserve(num_images * max_detections);
+  detection_scores->reserve(num_images * max_detections);
+  int num_valid_ground_truth = 0;
+  for (auto i = 0; i < num_images; ++i) {
+    const ImageEvaluation& evaluation = evaluations[evaluation_index + i];
+
+    for (int d = 0;
+         d < (int)evaluation.detection_scores.size() && d < max_detections;
+         ++d) { // detected instances
+      evaluation_indices->push_back(evaluation_index + i);
+      image_detection_indices->push_back(d);
+      detection_scores->push_back(evaluation.detection_scores[d]);
+    }
+    for (auto ground_truth_ignore : evaluation.ground_truth_ignores) {
+      if (!ground_truth_ignore) {
+        ++num_valid_ground_truth;
+      }
+    }
+  }
+
+  // Sort detections by decreasing score, using stable sort to match
+  // python implementation
+  detection_sorted_indices->resize(detection_scores->size());
+  std::iota(
+      detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);
+  std::stable_sort(
+      detection_sorted_indices->begin(),
+      detection_sorted_indices->end(),
+      [&detection_scores](size_t j1, size_t j2) {
+        return (*detection_scores)[j1] > (*detection_scores)[j2];
+      });
+
+  return num_valid_ground_truth;
+}
+
+// Helper function to Accumulate()
+// Compute a precision recall curve given a sorted list of detected instances
+// encoded in evaluations, evaluation_indices, detection_scores,
+// detection_sorted_indices, image_detection_indices (see
+// BuildSortedDetectionList()). Using vectors precisions and recalls
+// and temporary storage, output the results into precisions_out, recalls_out,
+// and scores_out, which are large buffers containing many precion/recall curves
+// for all possible parameter settings, with precisions_out_index and
+// recalls_out_index defining the applicable indices to store results.
+void ComputePrecisionRecallCurve(
+    const int64_t precisions_out_index,
+    const int64_t precisions_out_stride,
+    const int64_t recalls_out_index,
+    const std::vector<double>& recall_thresholds,
+    const int iou_threshold_index,
+    const int num_iou_thresholds,
+    const int num_valid_ground_truth,
+    const std::vector<ImageEvaluation>& evaluations,
+    const std::vector<uint64_t>& evaluation_indices,
+    const std::vector<double>& detection_scores,
+    const std::vector<uint64_t>& detection_sorted_indices,
+    const std::vector<uint64_t>& image_detection_indices,
+    std::vector<double>* precisions,
+    std::vector<double>* recalls,
+    std::vector<double>* precisions_out,
+    std::vector<double>* scores_out,
+    std::vector<double>* recalls_out) {
+  assert(recalls_out->size() > recalls_out_index);
+
+  // Compute precision/recall for each instance in the sorted list of detections
+  int64_t true_positives_sum = 0, false_positives_sum = 0;
+  precisions->clear();
+  recalls->clear();
+  precisions->reserve(detection_sorted_indices.size());
+  recalls->reserve(detection_sorted_indices.size());
+  assert(!evaluations.empty() || detection_sorted_indices.empty());
+  for (auto detection_sorted_index : detection_sorted_indices) {
+    const ImageEvaluation& evaluation =
+        evaluations[evaluation_indices[detection_sorted_index]];
+    const auto num_detections =
+        evaluation.detection_matches.size() / num_iou_thresholds;
+    const auto detection_index = iou_threshold_index * num_detections +
+        image_detection_indices[detection_sorted_index];
+    assert(evaluation.detection_matches.size() > detection_index);
+    assert(evaluation.detection_ignores.size() > detection_index);
+    const int64_t detection_match =
+        evaluation.detection_matches[detection_index];
+    const bool detection_ignores =
+        evaluation.detection_ignores[detection_index];
+    const auto true_positive = detection_match > 0 && !detection_ignores;
+    const auto false_positive = detection_match == 0 && !detection_ignores;
+    if (true_positive) {
+      ++true_positives_sum;
+    }
+    if (false_positive) {
+      ++false_positives_sum;
+    }
+
+    const double recall =
+        static_cast<double>(true_positives_sum) / num_valid_ground_truth;
+    recalls->push_back(recall);
+    const int64_t num_valid_detections =
+        true_positives_sum + false_positives_sum;
+    const double precision = num_valid_detections > 0
+        ? static_cast<double>(true_positives_sum) / num_valid_detections
+        : 0.0;
+    precisions->push_back(precision);
+  }
+
+  (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0;
+
+  for (int64_t i = static_cast<int64_t>(precisions->size()) - 1; i > 0; --i) {
+    if ((*precisions)[i] > (*precisions)[i - 1]) {
+      (*precisions)[i - 1] = (*precisions)[i];
+    }
+  }
+
+  // Sample the per instance precision/recall list at each recall threshold
+  for (size_t r = 0; r < recall_thresholds.size(); ++r) {
+    // first index in recalls >= recall_thresholds[r]
+    std::vector<double>::iterator low = std::lower_bound(
+        recalls->begin(), recalls->end(), recall_thresholds[r]);
+    size_t precisions_index = low - recalls->begin();
+
+    const auto results_ind = precisions_out_index + r * precisions_out_stride;
+    assert(results_ind < precisions_out->size());
+    assert(results_ind < scores_out->size());
+    if (precisions_index < precisions->size()) {
+      (*precisions_out)[results_ind] = (*precisions)[precisions_index];
+      (*scores_out)[results_ind] =
+          detection_scores[detection_sorted_indices[precisions_index]];
+    } else {
+      (*precisions_out)[results_ind] = 0;
+      (*scores_out)[results_ind] = 0;
+    }
+  }
+}
+py::dict Accumulate(
+    const py::object& params,
+    const std::vector<ImageEvaluation>& evaluations) {
+  const std::vector<double> recall_thresholds =
+      list_to_vec<double>(params.attr("recThrs"));
+  const std::vector<int> max_detections =
+      list_to_vec<int>(params.attr("maxDets"));
+  const int num_iou_thresholds = py::len(params.attr("iouThrs"));
+  const int num_recall_thresholds = py::len(params.attr("recThrs"));
+  const int num_categories = params.attr("useCats").cast<int>() == 1
+      ? py::len(params.attr("catIds"))
+      : 1;
+  const int num_area_ranges = py::len(params.attr("areaRng"));
+  const int num_max_detections = py::len(params.attr("maxDets"));
+  const int num_images = py::len(params.attr("imgIds"));
+
+  std::vector<double> precisions_out(
+      num_iou_thresholds * num_recall_thresholds * num_categories *
+          num_area_ranges * num_max_detections,
+      -1);
+  std::vector<double> recalls_out(
+      num_iou_thresholds * num_categories * num_area_ranges *
+          num_max_detections,
+      -1);
+  std::vector<double> scores_out(
+      num_iou_thresholds * num_recall_thresholds * num_categories *
+          num_area_ranges * num_max_detections,
+      -1);
+
+  // Consider the list of all detected instances in the entire dataset in one
+  // large list.  evaluation_indices, detection_scores,
+  // image_detection_indices, and detection_sorted_indices all have the same
+  // length as this list, such that each entry corresponds to one detected
+  // instance
+  std::vector<uint64_t> evaluation_indices; // indices into evaluations[]
+  std::vector<double> detection_scores; // detection scores of each instance
+  std::vector<uint64_t> detection_sorted_indices; // sorted indices of all
+                                                  // instances in the dataset
+  std::vector<uint64_t>
+      image_detection_indices; // indices into the list of detected instances in
+                               // the same image as each instance
+  std::vector<double> precisions, recalls;
+
+  for (auto c = 0; c < num_categories; ++c) {
+    for (auto a = 0; a < num_area_ranges; ++a) {
+      for (auto m = 0; m < num_max_detections; ++m) {
+        // The COCO PythonAPI assumes evaluations[] (the return value of
+        // COCOeval::EvaluateImages() is one long list storing results for each
+        // combination of category, area range, and image id, with categories in
+        // the outermost loop and images in the innermost loop.
+        const int64_t evaluations_index =
+            c * num_area_ranges * num_images + a * num_images;
+        int num_valid_ground_truth = BuildSortedDetectionList(
+            evaluations,
+            evaluations_index,
+            num_images,
+            max_detections[m],
+            &evaluation_indices,
+            &detection_scores,
+            &detection_sorted_indices,
+            &image_detection_indices);
+
+        if (num_valid_ground_truth == 0) {
+          continue;
+        }
+
+        for (auto t = 0; t < num_iou_thresholds; ++t) {
+          // recalls_out is a flattened vectors representing a
+          // num_iou_thresholds X num_categories X num_area_ranges X
+          // num_max_detections matrix
+          const int64_t recalls_out_index =
+              t * num_categories * num_area_ranges * num_max_detections +
+              c * num_area_ranges * num_max_detections +
+              a * num_max_detections + m;
+
+          // precisions_out and scores_out are flattened vectors
+          // representing a num_iou_thresholds X num_recall_thresholds X
+          // num_categories X num_area_ranges X num_max_detections matrix
+          const int64_t precisions_out_stride =
+              num_categories * num_area_ranges * num_max_detections;
+          const int64_t precisions_out_index = t * num_recall_thresholds *
+                  num_categories * num_area_ranges * num_max_detections +
+              c * num_area_ranges * num_max_detections +
+              a * num_max_detections + m;
+
+          ComputePrecisionRecallCurve(
+              precisions_out_index,
+              precisions_out_stride,
+              recalls_out_index,
+              recall_thresholds,
+              t,
+              num_iou_thresholds,
+              num_valid_ground_truth,
+              evaluations,
+              evaluation_indices,
+              detection_scores,
+              detection_sorted_indices,
+              image_detection_indices,
+              &precisions,
+              &recalls,
+              &precisions_out,
+              &scores_out,
+              &recalls_out);
+        }
+      }
+    }
+  }
+
+  time_t rawtime;
+  struct tm local_time;
+  std::array<char, 200> buffer;
+  time(&rawtime);
+#ifdef _WIN32
+  localtime_s(&local_time, &rawtime);
+#else
+  localtime_r(&rawtime, &local_time);
+#endif
+  strftime(
+      buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time);
+  return py::dict(
+      "params"_a = params,
+      "counts"_a = std::vector<int64_t>(
+          {num_iou_thresholds,
+           num_recall_thresholds,
+           num_categories,
+           num_area_ranges,
+           num_max_detections}),
+      "date"_a = buffer,
+      "precision"_a = precisions_out,
+      "recall"_a = recalls_out,
+      "scores"_a = scores_out);
+}
+
+} // namespace COCOeval
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.h b/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.h
new file mode 100644
index 0000000000000000000000000000000000000000..db246e49a026b7cd989b305f4d3d98100be3c912
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/cocoeval/cocoeval.h
@@ -0,0 +1,88 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#pragma once
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/stl_bind.h>
+#include <vector>
+
+namespace py = pybind11;
+
+namespace detectron2 {
+
+namespace COCOeval {
+
+// Annotation data for a single object instance in an image
+struct InstanceAnnotation {
+  InstanceAnnotation(
+      uint64_t id,
+      double score,
+      double area,
+      bool is_crowd,
+      bool ignore)
+      : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {}
+  uint64_t id;
+  double score = 0.;
+  double area = 0.;
+  bool is_crowd = false;
+  bool ignore = false;
+};
+
+// Stores intermediate results for evaluating detection results for a single
+// image that has D detected instances and G ground truth instances. This stores
+// matches between detected and ground truth instances
+struct ImageEvaluation {
+  // For each of the D detected instances, the id of the matched ground truth
+  // instance, or 0 if unmatched
+  std::vector<uint64_t> detection_matches;
+
+  // The detection score of each of the D detected instances
+  std::vector<double> detection_scores;
+
+  // Marks whether or not each of G instances was ignored from evaluation (e.g.,
+  // because it's outside area_range)
+  std::vector<bool> ground_truth_ignores;
+
+  // Marks whether or not each of D instances was ignored from evaluation (e.g.,
+  // because it's outside aRng)
+  std::vector<bool> detection_ignores;
+};
+
+template <class T>
+using ImageCategoryInstances = std::vector<std::vector<std::vector<T>>>;
+
+// C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg().  For each
+// combination of image, category, area range settings, and IOU thresholds to
+// evaluate, it matches detected instances to ground truth instances and stores
+// the results into a vector of ImageEvaluation results, which will be
+// interpreted by the COCOeval::Accumulate() function to produce precion-recall
+// curves.  The parameters of nested vectors have the following semantics:
+//   image_category_ious[i][c][d][g] is the intersection over union of the d'th
+//     detected instance and g'th ground truth instance of
+//     category category_ids[c] in image image_ids[i]
+//   image_category_ground_truth_instances[i][c] is a vector of ground truth
+//     instances in image image_ids[i] of category category_ids[c]
+//   image_category_detection_instances[i][c] is a vector of detected
+//     instances in image image_ids[i] of category category_ids[c]
+std::vector<ImageEvaluation> EvaluateImages(
+    const std::vector<std::array<double, 2>>& area_ranges, // vector of 2-tuples
+    int max_detections,
+    const std::vector<double>& iou_thresholds,
+    const ImageCategoryInstances<std::vector<double>>& image_category_ious,
+    const ImageCategoryInstances<InstanceAnnotation>&
+        image_category_ground_truth_instances,
+    const ImageCategoryInstances<InstanceAnnotation>&
+        image_category_detection_instances);
+
+// C++ implementation of COCOeval.accumulate(), which generates precision
+// recall curves for each set of category, IOU threshold, detection area range,
+// and max number of detections parameters.  It is assumed that the parameter
+// evaluations is the return value of the functon COCOeval::EvaluateImages(),
+// which was called with the same parameter settings params
+py::dict Accumulate(
+    const py::object& params,
+    const std::vector<ImageEvaluation>& evalutations);
+
+} // namespace COCOeval
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/cuda_version.cu b/src/sts/detectron2/layers/csrc/cuda_version.cu
new file mode 100644
index 0000000000000000000000000000000000000000..6dfe1b90c1f65c443681813fd3e3386c9faa3360
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/cuda_version.cu
@@ -0,0 +1,26 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+
+#include <cuda_runtime_api.h>
+
+namespace detectron2 {
+int get_cudart_version() {
+// Not a ROCM platform: Either HIP is not used, or
+// it is used, but platform is not ROCM (i.e. it is CUDA)
+#if !defined(__HIP_PLATFORM_HCC__)
+  return CUDART_VERSION;
+#else
+  int version = 0;
+
+#if HIP_VERSION_MAJOR != 0
+  // Create a convention similar to that of CUDA, as assumed by other
+  // parts of the code.
+
+  version = HIP_VERSION_MINOR;
+  version += (HIP_VERSION_MAJOR * 100);
+#else
+  hipRuntimeGetVersion(&version);
+#endif
+  return version;
+#endif
+}
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/deformable/deform_conv.h b/src/sts/detectron2/layers/csrc/deformable/deform_conv.h
new file mode 100644
index 0000000000000000000000000000000000000000..ec8c6c2fdb0274aefb86523894174f9ca58bbb43
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/deformable/deform_conv.h
@@ -0,0 +1,377 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#pragma once
+#include <torch/types.h>
+
+namespace detectron2 {
+
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+int deform_conv_forward_cuda(
+    at::Tensor input,
+    at::Tensor weight,
+    at::Tensor offset,
+    at::Tensor output,
+    at::Tensor columns,
+    at::Tensor ones,
+    int kW,
+    int kH,
+    int dW,
+    int dH,
+    int padW,
+    int padH,
+    int dilationW,
+    int dilationH,
+    int group,
+    int deformable_group,
+    int im2col_step);
+
+int deform_conv_backward_input_cuda(
+    at::Tensor input,
+    at::Tensor offset,
+    at::Tensor gradOutput,
+    at::Tensor gradInput,
+    at::Tensor gradOffset,
+    at::Tensor weight,
+    at::Tensor columns,
+    int kW,
+    int kH,
+    int dW,
+    int dH,
+    int padW,
+    int padH,
+    int dilationW,
+    int dilationH,
+    int group,
+    int deformable_group,
+    int im2col_step);
+
+int deform_conv_backward_parameters_cuda(
+    at::Tensor input,
+    at::Tensor offset,
+    at::Tensor gradOutput,
+    at::Tensor gradWeight, // at::Tensor gradBias,
+    at::Tensor columns,
+    at::Tensor ones,
+    int kW,
+    int kH,
+    int dW,
+    int dH,
+    int padW,
+    int padH,
+    int dilationW,
+    int dilationH,
+    int group,
+    int deformable_group,
+    float scale,
+    int im2col_step);
+
+void modulated_deform_conv_cuda_forward(
+    at::Tensor input,
+    at::Tensor weight,
+    at::Tensor bias,
+    at::Tensor ones,
+    at::Tensor offset,
+    at::Tensor mask,
+    at::Tensor output,
+    at::Tensor columns,
+    int kernel_h,
+    int kernel_w,
+    const int stride_h,
+    const int stride_w,
+    const int pad_h,
+    const int pad_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int group,
+    const int deformable_group,
+    const bool with_bias);
+
+void modulated_deform_conv_cuda_backward(
+    at::Tensor input,
+    at::Tensor weight,
+    at::Tensor bias,
+    at::Tensor ones,
+    at::Tensor offset,
+    at::Tensor mask,
+    at::Tensor columns,
+    at::Tensor grad_input,
+    at::Tensor grad_weight,
+    at::Tensor grad_bias,
+    at::Tensor grad_offset,
+    at::Tensor grad_mask,
+    at::Tensor grad_output,
+    int kernel_h,
+    int kernel_w,
+    int stride_h,
+    int stride_w,
+    int pad_h,
+    int pad_w,
+    int dilation_h,
+    int dilation_w,
+    int group,
+    int deformable_group,
+    const bool with_bias);
+
+#endif
+
+inline int deform_conv_forward(
+    at::Tensor input,
+    at::Tensor weight,
+    at::Tensor offset,
+    at::Tensor output,
+    at::Tensor columns,
+    at::Tensor ones,
+    int kW,
+    int kH,
+    int dW,
+    int dH,
+    int padW,
+    int padH,
+    int dilationW,
+    int dilationH,
+    int group,
+    int deformable_group,
+    int im2col_step) {
+  if (input.is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+    TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!");
+    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
+    return deform_conv_forward_cuda(
+        input,
+        weight,
+        offset,
+        output,
+        columns,
+        ones,
+        kW,
+        kH,
+        dW,
+        dH,
+        padW,
+        padH,
+        dilationW,
+        dilationH,
+        group,
+        deformable_group,
+        im2col_step);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("Not implemented on the CPU");
+}
+
+inline int deform_conv_backward_input(
+    at::Tensor input,
+    at::Tensor offset,
+    at::Tensor gradOutput,
+    at::Tensor gradInput,
+    at::Tensor gradOffset,
+    at::Tensor weight,
+    at::Tensor columns,
+    int kW,
+    int kH,
+    int dW,
+    int dH,
+    int padW,
+    int padH,
+    int dilationW,
+    int dilationH,
+    int group,
+    int deformable_group,
+    int im2col_step) {
+  if (gradOutput.is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+    TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!");
+    TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!");
+    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
+    return deform_conv_backward_input_cuda(
+        input,
+        offset,
+        gradOutput,
+        gradInput,
+        gradOffset,
+        weight,
+        columns,
+        kW,
+        kH,
+        dW,
+        dH,
+        padW,
+        padH,
+        dilationW,
+        dilationH,
+        group,
+        deformable_group,
+        im2col_step);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("Not implemented on the CPU");
+}
+
+inline int deform_conv_backward_filter(
+    at::Tensor input,
+    at::Tensor offset,
+    at::Tensor gradOutput,
+    at::Tensor gradWeight, // at::Tensor gradBias,
+    at::Tensor columns,
+    at::Tensor ones,
+    int kW,
+    int kH,
+    int dW,
+    int dH,
+    int padW,
+    int padH,
+    int dilationW,
+    int dilationH,
+    int group,
+    int deformable_group,
+    float scale,
+    int im2col_step) {
+  if (gradOutput.is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+    TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!");
+    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
+    return deform_conv_backward_parameters_cuda(
+        input,
+        offset,
+        gradOutput,
+        gradWeight,
+        columns,
+        ones,
+        kW,
+        kH,
+        dW,
+        dH,
+        padW,
+        padH,
+        dilationW,
+        dilationH,
+        group,
+        deformable_group,
+        scale,
+        im2col_step);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("Not implemented on the CPU");
+}
+
+inline void modulated_deform_conv_forward(
+    at::Tensor input,
+    at::Tensor weight,
+    at::Tensor bias,
+    at::Tensor ones,
+    at::Tensor offset,
+    at::Tensor mask,
+    at::Tensor output,
+    at::Tensor columns,
+    int kernel_h,
+    int kernel_w,
+    const int stride_h,
+    const int stride_w,
+    const int pad_h,
+    const int pad_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int group,
+    const int deformable_group,
+    const bool with_bias) {
+  if (input.is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+    TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!");
+    TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!");
+    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
+    return modulated_deform_conv_cuda_forward(
+        input,
+        weight,
+        bias,
+        ones,
+        offset,
+        mask,
+        output,
+        columns,
+        kernel_h,
+        kernel_w,
+        stride_h,
+        stride_w,
+        pad_h,
+        pad_w,
+        dilation_h,
+        dilation_w,
+        group,
+        deformable_group,
+        with_bias);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("Not implemented on the CPU");
+}
+
+inline void modulated_deform_conv_backward(
+    at::Tensor input,
+    at::Tensor weight,
+    at::Tensor bias,
+    at::Tensor ones,
+    at::Tensor offset,
+    at::Tensor mask,
+    at::Tensor columns,
+    at::Tensor grad_input,
+    at::Tensor grad_weight,
+    at::Tensor grad_bias,
+    at::Tensor grad_offset,
+    at::Tensor grad_mask,
+    at::Tensor grad_output,
+    int kernel_h,
+    int kernel_w,
+    int stride_h,
+    int stride_w,
+    int pad_h,
+    int pad_w,
+    int dilation_h,
+    int dilation_w,
+    int group,
+    int deformable_group,
+    const bool with_bias) {
+  if (grad_output.is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+    TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!");
+    TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!");
+    TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!");
+    TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!");
+    return modulated_deform_conv_cuda_backward(
+        input,
+        weight,
+        bias,
+        ones,
+        offset,
+        mask,
+        columns,
+        grad_input,
+        grad_weight,
+        grad_bias,
+        grad_offset,
+        grad_mask,
+        grad_output,
+        kernel_h,
+        kernel_w,
+        stride_h,
+        stride_w,
+        pad_h,
+        pad_w,
+        dilation_h,
+        dilation_w,
+        group,
+        deformable_group,
+        with_bias);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+  AT_ERROR("Not implemented on the CPU");
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.cu b/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2072bb856ec40b61c3826cead2fb7bb7c971a089
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda.cu
@@ -0,0 +1,1223 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+
+// modified from
+// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp
+// Original license: Apache 2.0
+
+// modify from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
+// Original license: Apache 2.0
+
+#include <torch/types.h>
+
+#include "deform_conv.h"
+
+#include <cmath>
+#include <vector>
+
+namespace detectron2 {
+
+void deformable_im2col(
+    const at::Tensor data_im,
+    const at::Tensor data_offset,
+    const int channels,
+    const int height,
+    const int width,
+    const int ksize_h,
+    const int ksize_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int parallel_imgs,
+    const int deformable_group,
+    at::Tensor data_col);
+
+void deformable_col2im(
+    const at::Tensor data_col,
+    const at::Tensor data_offset,
+    const int channels,
+    const int height,
+    const int width,
+    const int ksize_h,
+    const int ksize_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int parallel_imgs,
+    const int deformable_group,
+    at::Tensor grad_im);
+
+void deformable_col2im_coord(
+    const at::Tensor data_col,
+    const at::Tensor data_im,
+    const at::Tensor data_offset,
+    const int channels,
+    const int height,
+    const int width,
+    const int ksize_h,
+    const int ksize_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int parallel_imgs,
+    const int deformable_group,
+    at::Tensor grad_offset);
+
+void modulated_deformable_im2col_cuda(
+    const at::Tensor data_im,
+    const at::Tensor data_offset,
+    const at::Tensor data_mask,
+    const int batch_size,
+    const int channels,
+    const int height_im,
+    const int width_im,
+    const int height_col,
+    const int width_col,
+    const int kernel_h,
+    const int kenerl_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int deformable_group,
+    at::Tensor data_col);
+
+void modulated_deformable_col2im_cuda(
+    const at::Tensor data_col,
+    const at::Tensor data_offset,
+    const at::Tensor data_mask,
+    const int batch_size,
+    const int channels,
+    const int height_im,
+    const int width_im,
+    const int height_col,
+    const int width_col,
+    const int kernel_h,
+    const int kenerl_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int deformable_group,
+    at::Tensor grad_im);
+
+void modulated_deformable_col2im_coord_cuda(
+    const at::Tensor data_col,
+    const at::Tensor data_im,
+    const at::Tensor data_offset,
+    const at::Tensor data_mask,
+    const int batch_size,
+    const int channels,
+    const int height_im,
+    const int width_im,
+    const int height_col,
+    const int width_col,
+    const int kernel_h,
+    const int kenerl_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int deformable_group,
+    at::Tensor grad_offset,
+    at::Tensor grad_mask);
+
+void shape_check(
+    at::Tensor input,
+    at::Tensor offset,
+    at::Tensor* gradOutput,
+    at::Tensor weight,
+    int kH,
+    int kW,
+    int dH,
+    int dW,
+    int padH,
+    int padW,
+    int dilationH,
+    int dilationW,
+    int group,
+    int deformable_group) {
+  TORCH_CHECK(
+      weight.ndimension() == 4,
+      "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
+      "but got: %s",
+      weight.ndimension());
+
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+
+  TORCH_CHECK(
+      kW > 0 && kH > 0,
+      "kernel size should be greater than zero, but got kH: %d kW: %d",
+      kH,
+      kW);
+
+  TORCH_CHECK(
+      (weight.size(2) == kH && weight.size(3) == kW),
+      "kernel size should be consistent with weight, ",
+      "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d",
+      kH,
+      kW,
+      weight.size(2),
+      weight.size(3));
+
+  TORCH_CHECK(
+      dW > 0 && dH > 0,
+      "stride should be greater than zero, but got dH: %d dW: %d",
+      dH,
+      dW);
+
+  TORCH_CHECK(
+      dilationW > 0 && dilationH > 0,
+      "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
+      dilationH,
+      dilationW);
+
+  int ndim = input.ndimension();
+  int dimf = 0;
+  int dimh = 1;
+  int dimw = 2;
+
+  if (ndim == 4) {
+    dimf++;
+    dimh++;
+    dimw++;
+  }
+
+  TORCH_CHECK(
+      ndim == 3 || ndim == 4,
+      "3D or 4D input tensor expected but got: %s",
+      ndim);
+
+  long nInputPlane = weight.size(1) * group;
+  long inputHeight = input.size(dimh);
+  long inputWidth = input.size(dimw);
+  long nOutputPlane = weight.size(0);
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+  TORCH_CHECK(
+      nInputPlane % deformable_group == 0,
+      "input channels must divide deformable group size");
+
+  if (outputWidth < 1 || outputHeight < 1)
+    AT_ERROR(
+        "Given input size: (%ld x %ld x %ld). "
+        "Calculated output size: (%ld x %ld x %ld). Output size is too small",
+        nInputPlane,
+        inputHeight,
+        inputWidth,
+        nOutputPlane,
+        outputHeight,
+        outputWidth);
+
+  TORCH_CHECK(
+      input.size(1) == nInputPlane,
+      "invalid number of input planes, expected: %d, but got: %d",
+      nInputPlane,
+      input.size(1));
+
+  TORCH_CHECK(
+      (inputHeight + 2 * padH >= kH && inputWidth + 2 * padW >= kW),
+      "input image is smaller than kernel");
+
+  TORCH_CHECK(
+      (offset.size(2) == outputHeight && offset.size(3) == outputWidth),
+      "invalid spatial size of offset, expected height: %d width: %d, but "
+      "got height: %d width: %d",
+      outputHeight,
+      outputWidth,
+      offset.size(2),
+      offset.size(3));
+
+  TORCH_CHECK(
+      (offset.size(1) == deformable_group * 2 * kH * kW),
+      "invalid number of channels of offset");
+
+  if (gradOutput != NULL) {
+    TORCH_CHECK(
+        gradOutput->size(dimf) == nOutputPlane,
+        "invalid number of gradOutput planes, expected: %d, but got: %d",
+        nOutputPlane,
+        gradOutput->size(dimf));
+
+    TORCH_CHECK(
+        (gradOutput->size(dimh) == outputHeight &&
+         gradOutput->size(dimw) == outputWidth),
+        "invalid size of gradOutput, expected height: %d width: %d , but "
+        "got height: %d width: %d",
+        outputHeight,
+        outputWidth,
+        gradOutput->size(dimh),
+        gradOutput->size(dimw));
+  }
+}
+
+int deform_conv_forward_cuda(
+    at::Tensor input,
+    at::Tensor weight,
+    at::Tensor offset,
+    at::Tensor output,
+    at::Tensor columns,
+    at::Tensor ones,
+    int kW,
+    int kH,
+    int dW,
+    int dH,
+    int padW,
+    int padH,
+    int dilationW,
+    int dilationH,
+    int group,
+    int deformable_group,
+    int im2col_step) {
+  // todo: resize columns to include im2col: done
+  // todo: add im2col_step as input
+  // todo: add new output buffer and transpose it to output (or directly
+  // transpose output) todo: possibly change data indexing because of
+  // parallel_imgs
+
+  shape_check(
+      input,
+      offset,
+      NULL,
+      weight,
+      kH,
+      kW,
+      dH,
+      dW,
+      padH,
+      padW,
+      dilationH,
+      dilationW,
+      group,
+      deformable_group);
+
+  input = input.contiguous();
+  offset = offset.contiguous();
+  weight = weight.contiguous();
+
+  int batch = 1;
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input.unsqueeze_(0);
+    offset.unsqueeze_(0);
+  }
+
+  // todo: assert batchsize dividable by im2col_step
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = weight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+  output = output.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       nOutputPlane,
+       outputHeight,
+       outputWidth});
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < outputHeight * outputWidth) {
+    ones = at::ones({outputHeight, outputWidth}, input.options());
+  }
+
+  input = input.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       nInputPlane,
+       inputHeight,
+       inputWidth});
+  offset = offset.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       deformable_group * 2 * kH * kW,
+       outputHeight,
+       outputWidth});
+
+  at::Tensor output_buffer = at::zeros(
+      {batchSize / im2col_step,
+       nOutputPlane,
+       im2col_step * outputHeight,
+       outputWidth},
+      output.options());
+
+  output_buffer = output_buffer.view(
+      {output_buffer.size(0),
+       group,
+       output_buffer.size(1) / group,
+       output_buffer.size(2),
+       output_buffer.size(3)});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(
+        input[elt],
+        offset[elt],
+        nInputPlane,
+        inputHeight,
+        inputWidth,
+        kH,
+        kW,
+        padH,
+        padW,
+        dH,
+        dW,
+        dilationH,
+        dilationW,
+        im2col_step,
+        deformable_group,
+        columns);
+
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view(
+        {group,
+         weight.size(0) / group,
+         weight.size(1),
+         weight.size(2),
+         weight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      output_buffer[elt][g] = output_buffer[elt][g]
+                                  .flatten(1)
+                                  .addmm_(weight[g].flatten(1), columns[g])
+                                  .view_as(output_buffer[elt][g]);
+    }
+  }
+
+  output_buffer = output_buffer.view(
+      {output_buffer.size(0),
+       output_buffer.size(1) * output_buffer.size(2),
+       output_buffer.size(3),
+       output_buffer.size(4)});
+
+  output_buffer = output_buffer.view(
+      {batchSize / im2col_step,
+       nOutputPlane,
+       im2col_step,
+       outputHeight,
+       outputWidth});
+  output_buffer.transpose_(1, 2);
+  output.copy_(output_buffer);
+  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    output = output.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+  }
+
+  return 1;
+}
+
+int deform_conv_backward_input_cuda(
+    at::Tensor input,
+    at::Tensor offset,
+    at::Tensor gradOutput,
+    at::Tensor gradInput,
+    at::Tensor gradOffset,
+    at::Tensor weight,
+    at::Tensor columns,
+    int kW,
+    int kH,
+    int dW,
+    int dH,
+    int padW,
+    int padH,
+    int dilationW,
+    int dilationH,
+    int group,
+    int deformable_group,
+    int im2col_step) {
+  shape_check(
+      input,
+      offset,
+      &gradOutput,
+      weight,
+      kH,
+      kW,
+      dH,
+      dW,
+      padH,
+      padW,
+      dilationH,
+      dilationW,
+      group,
+      deformable_group);
+
+  input = input.contiguous();
+  offset = offset.contiguous();
+  gradOutput = gradOutput.contiguous();
+  weight = weight.contiguous();
+
+  int batch = 1;
+
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view({1, input.size(0), input.size(1), input.size(2)});
+    offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});
+    gradOutput = gradOutput.view(
+        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+  }
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = weight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  // change order of grad output
+  gradOutput = gradOutput.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       nOutputPlane,
+       outputHeight,
+       outputWidth});
+  gradOutput.transpose_(1, 2);
+
+  gradInput = gradInput.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       nInputPlane,
+       inputHeight,
+       inputWidth});
+  input = input.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       nInputPlane,
+       inputHeight,
+       inputWidth});
+  gradOffset = gradOffset.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       deformable_group * 2 * kH * kW,
+       outputHeight,
+       outputWidth});
+  offset = offset.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       deformable_group * 2 * kH * kW,
+       outputHeight,
+       outputWidth});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    // divide into groups
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view(
+        {group,
+         weight.size(0) / group,
+         weight.size(1),
+         weight.size(2),
+         weight.size(3)});
+    gradOutput = gradOutput.view(
+        {gradOutput.size(0),
+         group,
+         gradOutput.size(1) / group,
+         gradOutput.size(2),
+         gradOutput.size(3),
+         gradOutput.size(4)});
+
+    for (int g = 0; g < group; g++) {
+      columns[g] = columns[g].addmm_(
+          weight[g].flatten(1).transpose(0, 1),
+          gradOutput[elt][g].flatten(1),
+          0.0f,
+          1.0f);
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    gradOutput = gradOutput.view(
+        {gradOutput.size(0),
+         gradOutput.size(1) * gradOutput.size(2),
+         gradOutput.size(3),
+         gradOutput.size(4),
+         gradOutput.size(5)});
+
+    deformable_col2im_coord(
+        columns,
+        input[elt],
+        offset[elt],
+        nInputPlane,
+        inputHeight,
+        inputWidth,
+        kH,
+        kW,
+        padH,
+        padW,
+        dH,
+        dW,
+        dilationH,
+        dilationW,
+        im2col_step,
+        deformable_group,
+        gradOffset[elt]);
+
+    deformable_col2im(
+        columns,
+        offset[elt],
+        nInputPlane,
+        inputHeight,
+        inputWidth,
+        kH,
+        kW,
+        padH,
+        padW,
+        dH,
+        dW,
+        dilationH,
+        dilationW,
+        im2col_step,
+        deformable_group,
+        gradInput[elt]);
+  }
+
+  gradOutput.transpose_(1, 2);
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  gradOffset = gradOffset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
+    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
+    gradOffset =
+        gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});
+  }
+
+  return 1;
+}
+
+int deform_conv_backward_parameters_cuda(
+    at::Tensor input,
+    at::Tensor offset,
+    at::Tensor gradOutput,
+    at::Tensor gradWeight, // at::Tensor gradBias,
+    at::Tensor columns,
+    at::Tensor ones,
+    int kW,
+    int kH,
+    int dW,
+    int dH,
+    int padW,
+    int padH,
+    int dilationW,
+    int dilationH,
+    int group,
+    int deformable_group,
+    float scale,
+    int im2col_step) {
+  // todo: transpose and reshape outGrad
+  // todo: reshape columns
+  // todo: add im2col_step as input
+
+  shape_check(
+      input,
+      offset,
+      &gradOutput,
+      gradWeight,
+      kH,
+      kW,
+      dH,
+      dW,
+      padH,
+      padW,
+      dilationH,
+      dilationW,
+      group,
+      deformable_group);
+
+  input = input.contiguous();
+  offset = offset.contiguous();
+  gradOutput = gradOutput.contiguous();
+
+  int batch = 1;
+
+  if (input.ndimension() == 3) {
+    // Force batch
+    batch = 0;
+    input = input.view(
+        at::IntList({1, input.size(0), input.size(1), input.size(2)}));
+    gradOutput = gradOutput.view(
+        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
+  }
+
+  long batchSize = input.size(0);
+  long nInputPlane = input.size(1);
+  long inputHeight = input.size(2);
+  long inputWidth = input.size(3);
+
+  long nOutputPlane = gradWeight.size(0);
+
+  long outputWidth =
+      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight =
+      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
+
+  columns = at::zeros(
+      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
+      input.options());
+
+  gradOutput = gradOutput.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       nOutputPlane,
+       outputHeight,
+       outputWidth});
+  gradOutput.transpose_(1, 2);
+
+  at::Tensor gradOutputBuffer = at::zeros_like(gradOutput);
+  gradOutputBuffer = gradOutputBuffer.view(
+      {batchSize / im2col_step,
+       nOutputPlane,
+       im2col_step,
+       outputHeight,
+       outputWidth});
+  gradOutputBuffer.copy_(gradOutput);
+  // gradOutput is not contiguous, so we do reshape (instead of view) next
+  gradOutputBuffer = gradOutputBuffer.reshape(
+      {batchSize / im2col_step,
+       nOutputPlane,
+       im2col_step * outputHeight,
+       outputWidth});
+
+  gradOutput.transpose_(1, 2);
+  gradOutput =
+      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
+
+  input = input.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       nInputPlane,
+       inputHeight,
+       inputWidth});
+  offset = offset.view(
+      {batchSize / im2col_step,
+       im2col_step,
+       deformable_group * 2 * kH * kW,
+       outputHeight,
+       outputWidth});
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    deformable_im2col(
+        input[elt],
+        offset[elt],
+        nInputPlane,
+        inputHeight,
+        inputWidth,
+        kH,
+        kW,
+        padH,
+        padW,
+        dH,
+        dW,
+        dilationH,
+        dilationW,
+        im2col_step,
+        deformable_group,
+        columns);
+
+    // divide into group
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.size(0),
+         group,
+         gradOutputBuffer.size(1) / group,
+         gradOutputBuffer.size(2),
+         gradOutputBuffer.size(3)});
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    gradWeight = gradWeight.view(
+        {group,
+         gradWeight.size(0) / group,
+         gradWeight.size(1),
+         gradWeight.size(2),
+         gradWeight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      gradWeight[g] = gradWeight[g]
+                          .flatten(1)
+                          .addmm_(
+                              gradOutputBuffer[elt][g].flatten(1),
+                              columns[g].transpose(1, 0),
+                              1.0,
+                              scale)
+                          .view_as(gradWeight[g]);
+    }
+    gradOutputBuffer = gradOutputBuffer.view(
+        {gradOutputBuffer.size(0),
+         gradOutputBuffer.size(1) * gradOutputBuffer.size(2),
+         gradOutputBuffer.size(3),
+         gradOutputBuffer.size(4)});
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    gradWeight = gradWeight.view(
+        {gradWeight.size(0) * gradWeight.size(1),
+         gradWeight.size(2),
+         gradWeight.size(3),
+         gradWeight.size(4)});
+  }
+
+  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
+  offset = offset.view(
+      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
+
+  if (batch == 0) {
+    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
+    input = input.view({nInputPlane, inputHeight, inputWidth});
+  }
+
+  return 1;
+}
+
+void modulated_deform_conv_cuda_forward(
+    at::Tensor input,
+    at::Tensor weight,
+    at::Tensor bias,
+    at::Tensor ones,
+    at::Tensor offset,
+    at::Tensor mask,
+    at::Tensor output,
+    at::Tensor columns,
+    int kernel_h,
+    int kernel_w,
+    const int stride_h,
+    const int stride_w,
+    const int pad_h,
+    const int pad_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int group,
+    const int deformable_group,
+    const bool with_bias) {
+  shape_check(
+      input,
+      offset,
+      NULL,
+      weight,
+      kernel_h,
+      kernel_w,
+      stride_h,
+      stride_w,
+      pad_h,
+      pad_w,
+      dilation_h,
+      dilation_w,
+      group,
+      deformable_group);
+
+  TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_out = weight.size(0);
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR(
+        "Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
+        kernel_h_,
+        kernel_w,
+        kernel_h_,
+        kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR(
+        "Input shape and kernel channels wont match: (%d vs %d).",
+        channels,
+        channels_kernel * group);
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  // mask shape check
+  TORCH_CHECK(
+      (mask.size(2) == height_out && mask.size(3) == width_out),
+      "invalid spatial size of mask, expected height: %d width: %d, but "
+      "got height: %d width: %d",
+      height_out,
+      width_out,
+      mask.size(2),
+      mask.size(3));
+
+  TORCH_CHECK(
+      (mask.size(1) == deformable_group * kernel_h * kernel_w),
+      "invalid number of channels of mask");
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = at::ones({height_out, width_out}, input.options());
+  }
+
+  // resize output
+  output = output.view({batch, channels_out, height_out, width_out}).zero_();
+  // resize temporary columns
+  columns = at::zeros(
+      {channels * kernel_h * kernel_w, 1 * height_out * width_out},
+      input.options());
+
+  output = output.view(
+      {output.size(0),
+       group,
+       output.size(1) / group,
+       output.size(2),
+       output.size(3)});
+
+  for (int b = 0; b < batch; b++) {
+    modulated_deformable_im2col_cuda(
+        input[b],
+        offset[b],
+        mask[b],
+        1,
+        channels,
+        height,
+        width,
+        height_out,
+        width_out,
+        kernel_h,
+        kernel_w,
+        pad_h,
+        pad_w,
+        stride_h,
+        stride_w,
+        dilation_h,
+        dilation_w,
+        deformable_group,
+        columns);
+
+    // divide into group
+    weight = weight.view(
+        {group,
+         weight.size(0) / group,
+         weight.size(1),
+         weight.size(2),
+         weight.size(3)});
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+
+    for (int g = 0; g < group; g++) {
+      output[b][g] = output[b][g]
+                         .flatten(1)
+                         .addmm_(weight[g].flatten(1), columns[g])
+                         .view_as(output[b][g]);
+    }
+
+    weight = weight.view(
+        {weight.size(0) * weight.size(1),
+         weight.size(2),
+         weight.size(3),
+         weight.size(4)});
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+  }
+
+  output = output.view(
+      {output.size(0),
+       output.size(1) * output.size(2),
+       output.size(3),
+       output.size(4)});
+
+  if (with_bias) {
+    output += bias.view({1, bias.size(0), 1, 1});
+  }
+}
+
+void modulated_deform_conv_cuda_backward(
+    at::Tensor input,
+    at::Tensor weight,
+    at::Tensor bias,
+    at::Tensor ones,
+    at::Tensor offset,
+    at::Tensor mask,
+    at::Tensor columns,
+    at::Tensor grad_input,
+    at::Tensor grad_weight,
+    at::Tensor grad_bias,
+    at::Tensor grad_offset,
+    at::Tensor grad_mask,
+    at::Tensor grad_output,
+    int kernel_h,
+    int kernel_w,
+    int stride_h,
+    int stride_w,
+    int pad_h,
+    int pad_w,
+    int dilation_h,
+    int dilation_w,
+    int group,
+    int deformable_group,
+    const bool with_bias) {
+  shape_check(
+      input,
+      offset,
+      &grad_output,
+      weight,
+      kernel_h,
+      kernel_w,
+      stride_h,
+      stride_w,
+      pad_h,
+      pad_w,
+      dilation_h,
+      dilation_w,
+      group,
+      deformable_group);
+
+  TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
+  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
+
+  const int batch = input.size(0);
+  const int channels = input.size(1);
+  const int height = input.size(2);
+  const int width = input.size(3);
+
+  const int channels_kernel = weight.size(1);
+  const int kernel_h_ = weight.size(2);
+  const int kernel_w_ = weight.size(3);
+  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+    AT_ERROR(
+        "Input shape and kernel shape wont match: (%d x %d vs %d x %d).",
+        kernel_h_,
+        kernel_w,
+        kernel_h_,
+        kernel_w_);
+  if (channels != channels_kernel * group)
+    AT_ERROR(
+        "Input shape and kernel channels wont match: (%d vs %d).",
+        channels,
+        channels_kernel * group);
+
+  const int height_out =
+      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int width_out =
+      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+  // mask shape check
+  TORCH_CHECK(
+      (mask.size(2) == height_out && mask.size(3) == width_out),
+      "invalid spatial size of mask, expected height: %d width: %d, but "
+      "got height: %d width: %d",
+      height_out,
+      width_out,
+      mask.size(2),
+      mask.size(3));
+
+  TORCH_CHECK(
+      (mask.size(1) == deformable_group * kernel_h * kernel_w),
+      "invalid number of channels of mask");
+
+  if (ones.ndimension() != 2 ||
+      ones.size(0) * ones.size(1) < height_out * width_out) {
+    // Resize plane and fill with ones...
+    ones = at::ones({height_out, width_out}, input.options());
+  }
+
+  grad_input = grad_input.view({batch, channels, height, width});
+  columns = at::zeros(
+      {channels * kernel_h * kernel_w, height_out * width_out},
+      input.options());
+
+  grad_output = grad_output.view(
+      {grad_output.size(0),
+       group,
+       grad_output.size(1) / group,
+       grad_output.size(2),
+       grad_output.size(3)});
+
+  for (int b = 0; b < batch; b++) {
+    // divide int group
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    weight = weight.view(
+        {group,
+         weight.size(0) / group,
+         weight.size(1),
+         weight.size(2),
+         weight.size(3)});
+
+    for (int g = 0; g < group; g++) {
+      columns[g].addmm_(
+          weight[g].flatten(1).transpose(0, 1),
+          grad_output[b][g].flatten(1),
+          0.0f,
+          1.0f);
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    weight = weight.view(
+        {weight.size(0) * weight.size(1),
+         weight.size(2),
+         weight.size(3),
+         weight.size(4)});
+
+    // gradient w.r.t. input coordinate data
+    modulated_deformable_col2im_coord_cuda(
+        columns,
+        input[b],
+        offset[b],
+        mask[b],
+        1,
+        channels,
+        height,
+        width,
+        height_out,
+        width_out,
+        kernel_h,
+        kernel_w,
+        pad_h,
+        pad_w,
+        stride_h,
+        stride_w,
+        dilation_h,
+        dilation_w,
+        deformable_group,
+        grad_offset[b],
+        grad_mask[b]);
+    // gradient w.r.t. input data
+    modulated_deformable_col2im_cuda(
+        columns,
+        offset[b],
+        mask[b],
+        1,
+        channels,
+        height,
+        width,
+        height_out,
+        width_out,
+        kernel_h,
+        kernel_w,
+        pad_h,
+        pad_w,
+        stride_h,
+        stride_w,
+        dilation_h,
+        dilation_w,
+        deformable_group,
+        grad_input[b]);
+
+    // gradient w.r.t. weight, dWeight should accumulate across the batch and
+    // group
+    modulated_deformable_im2col_cuda(
+        input[b],
+        offset[b],
+        mask[b],
+        1,
+        channels,
+        height,
+        width,
+        height_out,
+        width_out,
+        kernel_h,
+        kernel_w,
+        pad_h,
+        pad_w,
+        stride_h,
+        stride_w,
+        dilation_h,
+        dilation_w,
+        deformable_group,
+        columns);
+
+    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
+    grad_weight = grad_weight.view(
+        {group,
+         grad_weight.size(0) / group,
+         grad_weight.size(1),
+         grad_weight.size(2),
+         grad_weight.size(3)});
+    if (with_bias)
+      grad_bias = grad_bias.view({group, grad_bias.size(0) / group});
+
+    for (int g = 0; g < group; g++) {
+      grad_weight[g] =
+          grad_weight[g]
+              .flatten(1)
+              .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))
+              .view_as(grad_weight[g]);
+      if (with_bias) {
+        grad_bias[g] =
+            grad_bias[g]
+                .view({-1, 1})
+                .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))
+                .view(-1);
+      }
+    }
+
+    columns =
+        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
+    grad_weight = grad_weight.view(
+        {grad_weight.size(0) * grad_weight.size(1),
+         grad_weight.size(2),
+         grad_weight.size(3),
+         grad_weight.size(4)});
+    if (with_bias)
+      grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});
+  }
+  grad_output = grad_output.view(
+      {grad_output.size(0) * grad_output.size(1),
+       grad_output.size(2),
+       grad_output.size(3),
+       grad_output.size(4)});
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu b/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..f299c7add116685e9c87a187a85ea63f9f808867
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu
@@ -0,0 +1,1288 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+
+// modified from
+// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+// Original license: Apache 2.0
+// clang-format off
+
+// modify from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer *****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer *********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.cuh
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1703.06211
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
+ */
+
+#include <ATen/ATen.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <THC/THCAtomics.cuh>
+
+using namespace at;
+
+#define CUDA_KERNEL_LOOP(i, n)                                 \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+       i += blockDim.x * gridDim.x)
+
+
+namespace {
+
+const int CUDA_NUM_THREADS = 1024;
+const int kMaxGridNum = 65535;
+
+inline int GET_BLOCKS(const int N) {
+  return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);
+}
+
+}
+
+template <typename scalar_t>
+__device__ scalar_t deformable_im2col_bilinear(
+    const scalar_t* bottom_data,
+    const int data_width,
+    const int height,
+    const int width,
+    scalar_t h,
+    scalar_t w) {
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  scalar_t lh = h - h_low;
+  scalar_t lw = w - w_low;
+  scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+    v1 = bottom_data[h_low * data_width + w_low];
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+    v2 = bottom_data[h_low * data_width + w_high];
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = bottom_data[h_high * data_width + w_low];
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = bottom_data[h_high * data_width + w_high];
+
+  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+template <typename scalar_t>
+__device__ scalar_t get_gradient_weight(
+    scalar_t argmax_h,
+    scalar_t argmax_w,
+    const int h,
+    const int w,
+    const int height,
+    const int width) {
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+      argmax_w >= width) {
+    // empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+template <typename scalar_t>
+__device__ scalar_t get_coordinate_weight(
+    scalar_t argmax_h,
+    scalar_t argmax_w,
+    const int height,
+    const int width,
+    const scalar_t* im_data,
+    const int data_width,
+    const int bp_dir) {
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+      argmax_w >= width) {
+    // empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+
+  if (bp_dir == 0) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_w_low + 1 - argmax_w) *
+          im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += -1 * (argmax_w - argmax_w_low) *
+          im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += (argmax_w_low + 1 - argmax_w) *
+          im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_w - argmax_w_low) *
+          im_data[argmax_h_high * data_width + argmax_w_high];
+  } else if (bp_dir == 1) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h_low + 1 - argmax_h) *
+          im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += (argmax_h_low + 1 - argmax_h) *
+          im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h - argmax_h_low) *
+          im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_h - argmax_h_low) *
+          im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+template <typename scalar_t>
+__global__ void deformable_im2col_gpu_kernel(
+    const int n,
+    const scalar_t* data_im,
+    const scalar_t* data_offset,
+    const int height,
+    const int width,
+    const int kernel_h,
+    const int kernel_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int channel_per_deformable_group,
+    const int batch_size,
+    const int num_channels,
+    const int deformable_group,
+    const int height_col,
+    const int width_col,
+    scalar_t* data_col) {
+  CUDA_KERNEL_LOOP(index, n) {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+    scalar_t* data_col_ptr = data_col +
+        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    // const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) *
+    // height + h_in) * width + w_in;
+    const scalar_t* data_im_ptr =
+        data_im + (b_col * num_channels + c_im) * height * width;
+    const scalar_t* data_offset_ptr = data_offset +
+        (b_col * deformable_group + deformable_group_index) * 2 * kernel_h *
+            kernel_w * height_col * width_col;
+
+    for (int i = 0; i < kernel_h; ++i) {
+      for (int j = 0; j < kernel_w; ++j) {
+        const int data_offset_h_ptr =
+            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr =
+            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
+            w_col;
+        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+        scalar_t val = static_cast<scalar_t>(0);
+        const scalar_t h_im = h_in + i * dilation_h + offset_h;
+        const scalar_t w_im = w_in + j * dilation_w + offset_w;
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) {
+          // const scalar_t map_h = i * dilation_h + offset_h;
+          // const scalar_t map_w = j * dilation_w + offset_w;
+          // const int cur_height = height - h_in;
+          // const int cur_width = width - w_in;
+          // val = deformable_im2col_bilinear(data_im_ptr, width, cur_height,
+          // cur_width, map_h, map_w);
+          val = deformable_im2col_bilinear(
+              data_im_ptr, width, height, width, h_im, w_im);
+        }
+        *data_col_ptr = val;
+        data_col_ptr += batch_size * height_col * width_col;
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t>
+__global__ void deformable_col2im_gpu_kernel(
+    const int n,
+    const scalar_t* data_col,
+    const scalar_t* data_offset,
+    const int channels,
+    const int height,
+    const int width,
+    const int kernel_h,
+    const int kernel_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int channel_per_deformable_group,
+    const int batch_size,
+    const int deformable_group,
+    const int height_col,
+    const int width_col,
+    scalar_t* grad_im) {
+  CUDA_KERNEL_LOOP(index, n) {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i =
+        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c =
+        index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const scalar_t* data_offset_ptr = data_offset +
+        (b * deformable_group + deformable_group_index) * 2 * kernel_h *
+            kernel_w * height_col * width_col;
+    const int data_offset_h_ptr =
+        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr =
+        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const scalar_t cur_top_grad = data_col[index];
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++) {
+      for (int dx = -2; dx <= 2; dx++) {
+        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
+            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
+          int cur_bottom_grad_pos =
+              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          scalar_t weight = get_gradient_weight(
+              cur_inv_h_data,
+              cur_inv_w_data,
+              cur_h + dy,
+              cur_w + dx,
+              height,
+              width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+
+template <typename scalar_t>
+__global__ void deformable_col2im_coord_gpu_kernel(
+    const int n,
+    const scalar_t* data_col,
+    const scalar_t* data_im,
+    const scalar_t* data_offset,
+    const int channels,
+    const int height,
+    const int width,
+    const int kernel_h,
+    const int kernel_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int channel_per_deformable_group,
+    const int batch_size,
+    const int offset_channels,
+    const int deformable_group,
+    const int height_col,
+    const int width_col,
+    scalar_t* grad_offset) {
+  CUDA_KERNEL_LOOP(index, n) {
+    scalar_t val = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const scalar_t* data_col_ptr = data_col +
+        deformable_group_index * channel_per_deformable_group * batch_size *
+            width_col * height_col;
+    const scalar_t* data_im_ptr = data_im +
+        (b * deformable_group + deformable_group_index) *
+            channel_per_deformable_group / kernel_h / kernel_w * height * width;
+    const scalar_t* data_offset_ptr = data_offset +
+        (b * deformable_group + deformable_group_index) * 2 * kernel_h *
+            kernel_w * height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
+         col_c += col_step) {
+      const int col_pos =
+          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i =
+          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr =
+          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr =
+          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
+           w_out);
+      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+      scalar_t inv_h = h_in + i * dilation_h + offset_h;
+      scalar_t inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) {
+        inv_h = inv_w = -2;
+      }
+      const scalar_t weight = get_coordinate_weight(
+          inv_h,
+          inv_w,
+          height,
+          width,
+          data_im_ptr + cnt * height * width,
+          width,
+          bp_dir);
+      val += weight * data_col_ptr[col_pos];
+      cnt += 1;
+    }
+
+    grad_offset[index] = val;
+  }
+}
+
+
+namespace detectron2 {
+
+void deformable_im2col(
+    const at::Tensor data_im,
+    const at::Tensor data_offset,
+    const int channels,
+    const int height,
+    const int width,
+    const int ksize_h,
+    const int ksize_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int parallel_imgs,
+    const int deformable_group,
+    at::Tensor data_col) {
+  // num_axes should be smaller than block size
+  // todo: check parallel_imgs is correctly passed in
+  int height_col =
+      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col =
+      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = channels * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  at::cuda::CUDAGuard device_guard(data_im.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
+        const scalar_t* data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
+
+        deformable_im2col_gpu_kernel<<<
+            GET_BLOCKS(num_kernels),
+            CUDA_NUM_THREADS,
+            0,
+            stream>>>(
+            num_kernels,
+            data_im_,
+            data_offset_,
+            height,
+            width,
+            ksize_h,
+            ksize_w,
+            pad_h,
+            pad_w,
+            stride_h,
+            stride_w,
+            dilation_h,
+            dilation_w,
+            channel_per_deformable_group,
+            parallel_imgs,
+            channels,
+            deformable_group,
+            height_col,
+            width_col,
+            data_col_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    printf("error in deformable_im2col: %s\n", cudaGetErrorString(err));
+  }
+}
+
+
+void deformable_col2im(
+    const at::Tensor data_col,
+    const at::Tensor data_offset,
+    const int channels,
+    const int height,
+    const int width,
+    const int ksize_h,
+    const int ksize_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int parallel_imgs,
+    const int deformable_group,
+    at::Tensor grad_im) {
+  // todo: make sure parallel_imgs is passed in correctly
+  int height_col =
+      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col =
+      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels =
+      channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  at::cuda::CUDAGuard device_guard(data_col.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
+        const scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t* grad_im_ = grad_im.data_ptr<scalar_t>();
+
+        deformable_col2im_gpu_kernel<<<
+            GET_BLOCKS(num_kernels),
+            CUDA_NUM_THREADS,
+            0,
+            stream>>>(
+            num_kernels,
+            data_col_,
+            data_offset_,
+            channels,
+            height,
+            width,
+            ksize_h,
+            ksize_w,
+            pad_h,
+            pad_w,
+            stride_h,
+            stride_w,
+            dilation_h,
+            dilation_w,
+            channel_per_deformable_group,
+            parallel_imgs,
+            deformable_group,
+            height_col,
+            width_col,
+            grad_im_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    printf("error in deformable_col2im: %s\n", cudaGetErrorString(err));
+  }
+}
+
+
+void deformable_col2im_coord(
+    const at::Tensor data_col,
+    const at::Tensor data_im,
+    const at::Tensor data_offset,
+    const int channels,
+    const int height,
+    const int width,
+    const int ksize_h,
+    const int ksize_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int parallel_imgs,
+    const int deformable_group,
+    at::Tensor grad_offset) {
+  int height_col =
+      (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col =
+      (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w *
+      deformable_group * parallel_imgs;
+  int channel_per_deformable_group =
+      channels * ksize_h * ksize_w / deformable_group;
+
+  at::cuda::CUDAGuard device_guard(data_col.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
+        const scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t* data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
+        scalar_t* grad_offset_ = grad_offset.data_ptr<scalar_t>();
+
+        deformable_col2im_coord_gpu_kernel<<<
+            GET_BLOCKS(num_kernels),
+            CUDA_NUM_THREADS,
+            0,
+            stream>>>(
+            num_kernels,
+            data_col_,
+            data_im_,
+            data_offset_,
+            channels,
+            height,
+            width,
+            ksize_h,
+            ksize_w,
+            pad_h,
+            pad_w,
+            stride_h,
+            stride_w,
+            dilation_h,
+            dilation_w,
+            channel_per_deformable_group,
+            parallel_imgs,
+            2 * ksize_h * ksize_w * deformable_group,
+            deformable_group,
+            height_col,
+            width_col,
+            grad_offset_);
+      }));
+}
+
+} // namespace detectron2
+
+
+template <typename scalar_t>
+__device__ scalar_t dmcn_im2col_bilinear(
+    const scalar_t* bottom_data,
+    const int data_width,
+    const int height,
+    const int width,
+    scalar_t h,
+    scalar_t w) {
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  scalar_t lh = h - h_low;
+  scalar_t lw = w - w_low;
+  scalar_t hh = 1 - lh, hw = 1 - lw;
+
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+    v1 = bottom_data[h_low * data_width + w_low];
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+    v2 = bottom_data[h_low * data_width + w_high];
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = bottom_data[h_high * data_width + w_low];
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = bottom_data[h_high * data_width + w_high];
+
+  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+template <typename scalar_t>
+__device__ scalar_t dmcn_get_gradient_weight(
+    scalar_t argmax_h,
+    scalar_t argmax_w,
+    const int h,
+    const int w,
+    const int height,
+    const int width) {
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+      argmax_w >= width) {
+    // empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+template <typename scalar_t>
+__device__ scalar_t dmcn_get_coordinate_weight(
+    scalar_t argmax_h,
+    scalar_t argmax_w,
+    const int height,
+    const int width,
+    const scalar_t* im_data,
+    const int data_width,
+    const int bp_dir) {
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+      argmax_w >= width) {
+    // empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  scalar_t weight = 0;
+
+  if (bp_dir == 0) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_w_low + 1 - argmax_w) *
+          im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += -1 * (argmax_w - argmax_w_low) *
+          im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += (argmax_w_low + 1 - argmax_w) *
+          im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_w - argmax_w_low) *
+          im_data[argmax_h_high * data_width + argmax_w_high];
+  } else if (bp_dir == 1) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h_low + 1 - argmax_h) *
+          im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += (argmax_h_low + 1 - argmax_h) *
+          im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h - argmax_h_low) *
+          im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_h - argmax_h_low) *
+          im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+template <typename scalar_t>
+__global__ void modulated_deformable_im2col_gpu_kernel(
+    const int n,
+    const scalar_t* data_im,
+    const scalar_t* data_offset,
+    const scalar_t* data_mask,
+    const int height,
+    const int width,
+    const int kernel_h,
+    const int kernel_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int channel_per_deformable_group,
+    const int batch_size,
+    const int num_channels,
+    const int deformable_group,
+    const int height_col,
+    const int width_col,
+    scalar_t* data_col) {
+  CUDA_KERNEL_LOOP(index, n) {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+
+    scalar_t* data_col_ptr = data_col +
+        ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    // const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) *
+    // height + h_in) * width + w_in;
+    const scalar_t* data_im_ptr =
+        data_im + (b_col * num_channels + c_im) * height * width;
+    const scalar_t* data_offset_ptr = data_offset +
+        (b_col * deformable_group + deformable_group_index) * 2 * kernel_h *
+            kernel_w * height_col * width_col;
+
+    const scalar_t* data_mask_ptr = data_mask +
+        (b_col * deformable_group + deformable_group_index) * kernel_h *
+            kernel_w * height_col * width_col;
+
+    for (int i = 0; i < kernel_h; ++i) {
+      for (int j = 0; j < kernel_w; ++j) {
+        const int data_offset_h_ptr =
+            ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr =
+            ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
+            w_col;
+        const int data_mask_hw_ptr =
+            ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
+        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+        const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+        scalar_t val = static_cast<scalar_t>(0);
+        const scalar_t h_im = h_in + i * dilation_h + offset_h;
+        const scalar_t w_im = w_in + j * dilation_w + offset_w;
+        // if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) {
+          // const float map_h = i * dilation_h + offset_h;
+          // const float map_w = j * dilation_w + offset_w;
+          // const int cur_height = height - h_in;
+          // const int cur_width = width - w_in;
+          // val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height,
+          // cur_width, map_h, map_w);
+          val = dmcn_im2col_bilinear(
+              data_im_ptr, width, height, width, h_im, w_im);
+        }
+        *data_col_ptr = val * mask;
+        data_col_ptr += batch_size * height_col * width_col;
+        // data_col_ptr += height_col * width_col;
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void modulated_deformable_col2im_gpu_kernel(
+    const int n,
+    const scalar_t* data_col,
+    const scalar_t* data_offset,
+    const scalar_t* data_mask,
+    const int channels,
+    const int height,
+    const int width,
+    const int kernel_h,
+    const int kernel_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int channel_per_deformable_group,
+    const int batch_size,
+    const int deformable_group,
+    const int height_col,
+    const int width_col,
+    scalar_t* grad_im) {
+  CUDA_KERNEL_LOOP(index, n) {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i =
+        (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c =
+        index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const scalar_t* data_offset_ptr = data_offset +
+        (b * deformable_group + deformable_group_index) * 2 * kernel_h *
+            kernel_w * height_col * width_col;
+    const scalar_t* data_mask_ptr = data_mask +
+        (b * deformable_group + deformable_group_index) * kernel_h * kernel_w *
+            height_col * width_col;
+    const int data_offset_h_ptr =
+        ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr =
+        ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const int data_mask_hw_ptr =
+        ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
+    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+    const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const scalar_t cur_top_grad = data_col[index] * mask;
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++) {
+      for (int dx = -2; dx <= 2; dx++) {
+        if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
+            cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+            abs(cur_inv_w_data - (cur_w + dx)) < 1) {
+          int cur_bottom_grad_pos =
+              ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          scalar_t weight = dmcn_get_gradient_weight(
+              cur_inv_h_data,
+              cur_inv_w_data,
+              cur_h + dy,
+              cur_w + dx,
+              height,
+              width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void modulated_deformable_col2im_coord_gpu_kernel(
+    const int n,
+    const scalar_t* data_col,
+    const scalar_t* data_im,
+    const scalar_t* data_offset,
+    const scalar_t* data_mask,
+    const int channels,
+    const int height,
+    const int width,
+    const int kernel_h,
+    const int kernel_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int channel_per_deformable_group,
+    const int batch_size,
+    const int offset_channels,
+    const int deformable_group,
+    const int height_col,
+    const int width_col,
+    scalar_t* grad_offset,
+    scalar_t* grad_mask) {
+  CUDA_KERNEL_LOOP(index, n) {
+    scalar_t val = 0, mval = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const scalar_t* data_col_ptr = data_col +
+        deformable_group_index * channel_per_deformable_group * batch_size *
+            width_col * height_col;
+    const scalar_t* data_im_ptr = data_im +
+        (b * deformable_group + deformable_group_index) *
+            channel_per_deformable_group / kernel_h / kernel_w * height * width;
+    const scalar_t* data_offset_ptr = data_offset +
+        (b * deformable_group + deformable_group_index) * 2 * kernel_h *
+            kernel_w * height_col * width_col;
+    const scalar_t* data_mask_ptr = data_mask +
+        (b * deformable_group + deformable_group_index) * kernel_h * kernel_w *
+            height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
+         col_c += col_step) {
+      const int col_pos =
+          (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i =
+          (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr =
+          (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr =
+          (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
+           w_out);
+      const int data_mask_hw_ptr =
+          (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
+      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];
+      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];
+      const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];
+      scalar_t inv_h = h_in + i * dilation_h + offset_h;
+      scalar_t inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) {
+        inv_h = inv_w = -2;
+      } else {
+        mval += data_col_ptr[col_pos] *
+            dmcn_im2col_bilinear(
+                    data_im_ptr + cnt * height * width,
+                    width,
+                    height,
+                    width,
+                    inv_h,
+                    inv_w);
+      }
+      const scalar_t weight = dmcn_get_coordinate_weight(
+          inv_h,
+          inv_w,
+          height,
+          width,
+          data_im_ptr + cnt * height * width,
+          width,
+          bp_dir);
+      val += weight * data_col_ptr[col_pos] * mask;
+      cnt += 1;
+    }
+    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
+    grad_offset[index] = val;
+    if (offset_c % 2 == 0)
+      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group +
+      // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) *
+      // height_col + h) * width_col + w], mask_req, mval);
+      grad_mask
+          [(((b * deformable_group + deformable_group_index) * kernel_h *
+                 kernel_w +
+             offset_c / 2) *
+                height_col +
+            h) *
+               width_col +
+           w] = mval;
+  }
+}
+
+
+namespace detectron2 {
+
+void modulated_deformable_im2col_cuda(
+    const at::Tensor data_im,
+    const at::Tensor data_offset,
+    const at::Tensor data_mask,
+    const int batch_size,
+    const int channels,
+    const int height_im,
+    const int width_im,
+    const int height_col,
+    const int width_col,
+    const int kernel_h,
+    const int kenerl_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int deformable_group,
+    at::Tensor data_col) {
+  // num_axes should be smaller than block size
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * batch_size * height_col * width_col;
+
+  at::cuda::CUDAGuard device_guard(data_im.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
+        const scalar_t* data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t* data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
+
+        modulated_deformable_im2col_gpu_kernel<<<
+            GET_BLOCKS(num_kernels),
+            CUDA_NUM_THREADS,
+            0,
+            stream>>>(
+            num_kernels,
+            data_im_,
+            data_offset_,
+            data_mask_,
+            height_im,
+            width_im,
+            kernel_h,
+            kenerl_w,
+            pad_h,
+            pad_w,
+            stride_h,
+            stride_w,
+            dilation_h,
+            dilation_w,
+            channel_per_deformable_group,
+            batch_size,
+            channels,
+            deformable_group,
+            height_col,
+            width_col,
+            data_col_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    printf(
+        "error in modulated_deformable_im2col_cuda: %s\n",
+        cudaGetErrorString(err));
+  }
+}
+
+void modulated_deformable_col2im_cuda(
+    const at::Tensor data_col,
+    const at::Tensor data_offset,
+    const at::Tensor data_mask,
+    const int batch_size,
+    const int channels,
+    const int height_im,
+    const int width_im,
+    const int height_col,
+    const int width_col,
+    const int kernel_h,
+    const int kernel_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int deformable_group,
+    at::Tensor grad_im) {
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels =
+      channels * kernel_h * kernel_w * batch_size * height_col * width_col;
+
+  at::cuda::CUDAGuard device_guard(data_col.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
+        const scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t* data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t* grad_im_ = grad_im.data_ptr<scalar_t>();
+
+        modulated_deformable_col2im_gpu_kernel<<<
+            GET_BLOCKS(num_kernels),
+            CUDA_NUM_THREADS,
+            0,
+            stream>>>(
+            num_kernels,
+            data_col_,
+            data_offset_,
+            data_mask_,
+            channels,
+            height_im,
+            width_im,
+            kernel_h,
+            kernel_w,
+            pad_h,
+            pad_w,
+            stride_h,
+            stride_w,
+            dilation_h,
+            dilation_w,
+            channel_per_deformable_group,
+            batch_size,
+            deformable_group,
+            height_col,
+            width_col,
+            grad_im_);
+      }));
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    printf(
+        "error in modulated_deformable_col2im_cuda: %s\n",
+        cudaGetErrorString(err));
+  }
+}
+
+void modulated_deformable_col2im_coord_cuda(
+    const at::Tensor data_col,
+    const at::Tensor data_im,
+    const at::Tensor data_offset,
+    const at::Tensor data_mask,
+    const int batch_size,
+    const int channels,
+    const int height_im,
+    const int width_im,
+    const int height_col,
+    const int width_col,
+    const int kernel_h,
+    const int kernel_w,
+    const int pad_h,
+    const int pad_w,
+    const int stride_h,
+    const int stride_w,
+    const int dilation_h,
+    const int dilation_w,
+    const int deformable_group,
+    at::Tensor grad_offset,
+    at::Tensor grad_mask) {
+  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h *
+      kernel_w * deformable_group;
+  const int channel_per_deformable_group =
+      channels * kernel_h * kernel_w / deformable_group;
+
+  at::cuda::CUDAGuard device_guard(data_col.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
+        const scalar_t* data_col_ = data_col.data_ptr<scalar_t>();
+        const scalar_t* data_im_ = data_im.data_ptr<scalar_t>();
+        const scalar_t* data_offset_ = data_offset.data_ptr<scalar_t>();
+        const scalar_t* data_mask_ = data_mask.data_ptr<scalar_t>();
+        scalar_t* grad_offset_ = grad_offset.data_ptr<scalar_t>();
+        scalar_t* grad_mask_ = grad_mask.data_ptr<scalar_t>();
+
+        modulated_deformable_col2im_coord_gpu_kernel<<<
+            GET_BLOCKS(num_kernels),
+            CUDA_NUM_THREADS,
+            0,
+            stream>>>(
+            num_kernels,
+            data_col_,
+            data_im_,
+            data_offset_,
+            data_mask_,
+            channels,
+            height_im,
+            width_im,
+            kernel_h,
+            kernel_w,
+            pad_h,
+            pad_w,
+            stride_h,
+            stride_w,
+            dilation_h,
+            dilation_w,
+            channel_per_deformable_group,
+            batch_size,
+            2 * kernel_h * kernel_w * deformable_group,
+            deformable_group,
+            height_col,
+            width_col,
+            grad_offset_,
+            grad_mask_);
+      }));
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    printf(
+        "error in modulated_deformable_col2im_coord_cuda: %s\n",
+        cudaGetErrorString(err));
+  }
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated.h b/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated.h
new file mode 100644
index 0000000000000000000000000000000000000000..bd855e832afea4354885f5d8bfe94e204f51827e
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated.h
@@ -0,0 +1,39 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#pragma once
+#include <torch/types.h>
+
+namespace detectron2 {
+
+at::Tensor nms_rotated_cpu(
+    const at::Tensor& dets,
+    const at::Tensor& scores,
+    const double iou_threshold);
+
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+at::Tensor nms_rotated_cuda(
+    const at::Tensor& dets,
+    const at::Tensor& scores,
+    const double iou_threshold);
+#endif
+
+// Interface for Python
+// inline is needed to prevent multiple function definitions when this header is
+// included by different cpps
+inline at::Tensor nms_rotated(
+    const at::Tensor& dets,
+    const at::Tensor& scores,
+    const double iou_threshold) {
+  assert(dets.device().is_cuda() == scores.device().is_cuda());
+  if (dets.device().is_cuda()) {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+    return nms_rotated_cuda(
+        dets.contiguous(), scores.contiguous(), iou_threshold);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+
+  return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold);
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp b/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d7556e645b604aa83d86cc702b783fd8ecedffcc
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
@@ -0,0 +1,75 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#include "../box_iou_rotated/box_iou_rotated_utils.h"
+#include "nms_rotated.h"
+
+namespace detectron2 {
+
+template <typename scalar_t>
+at::Tensor nms_rotated_cpu_kernel(
+    const at::Tensor& dets,
+    const at::Tensor& scores,
+    const double iou_threshold) {
+  // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
+  // however, the code in this function is much shorter because
+  // we delegate the IoU computation for rotated boxes to
+  // the single_box_iou_rotated function in box_iou_rotated_utils.h
+  AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor");
+  AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor");
+  AT_ASSERTM(
+      dets.scalar_type() == scores.scalar_type(),
+      "dets should have the same type as scores");
+
+  if (dets.numel() == 0) {
+    return at::empty({0}, dets.options().dtype(at::kLong));
+  }
+
+  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+
+  auto ndets = dets.size(0);
+  at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte));
+  at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong));
+
+  auto suppressed = suppressed_t.data_ptr<uint8_t>();
+  auto keep = keep_t.data_ptr<int64_t>();
+  auto order = order_t.data_ptr<int64_t>();
+
+  int64_t num_to_keep = 0;
+
+  for (int64_t _i = 0; _i < ndets; _i++) {
+    auto i = order[_i];
+    if (suppressed[i] == 1) {
+      continue;
+    }
+
+    keep[num_to_keep++] = i;
+
+    for (int64_t _j = _i + 1; _j < ndets; _j++) {
+      auto j = order[_j];
+      if (suppressed[j] == 1) {
+        continue;
+      }
+
+      auto ovr = single_box_iou_rotated<scalar_t>(
+          dets[i].data_ptr<scalar_t>(), dets[j].data_ptr<scalar_t>());
+      if (ovr >= iou_threshold) {
+        suppressed[j] = 1;
+      }
+    }
+  }
+  return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
+}
+
+at::Tensor nms_rotated_cpu(
+    // input must be contiguous
+    const at::Tensor& dets,
+    const at::Tensor& scores,
+    const double iou_threshold) {
+  auto result = at::empty({0}, dets.options());
+
+  AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] {
+    result = nms_rotated_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
+  });
+  return result;
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu b/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2a3db5c62e7a2da52ccf5bac980653c943d630fd
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
@@ -0,0 +1,145 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+#ifdef WITH_CUDA
+#include "../box_iou_rotated/box_iou_rotated_utils.h"
+#endif
+// TODO avoid this when pytorch supports "same directory" hipification
+#ifdef WITH_HIP
+#include "box_iou_rotated/box_iou_rotated_utils.h"
+#endif
+
+using namespace detectron2;
+
+namespace {
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+}
+
+template <typename T>
+__global__ void nms_rotated_cuda_kernel(
+    const int n_boxes,
+    const double iou_threshold,
+    const T* dev_boxes,
+    unsigned long long* dev_mask) {
+  // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel
+
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size =
+      min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+  const int col_size =
+      min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+  // Compared to nms_cuda_kernel, where each box is represented with 4 values
+  // (x1, y1, x2, y2), each rotated box is represented with 5 values
+  // (x_center, y_center, width, height, angle_degrees) here.
+  __shared__ T block_boxes[threadsPerBlock * 5];
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 5 + 0] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+    block_boxes[threadIdx.x * 5 + 1] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+    block_boxes[threadIdx.x * 5 + 2] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+    block_boxes[threadIdx.x * 5 + 3] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+    block_boxes[threadIdx.x * 5 + 4] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+    const T* cur_box = dev_boxes + cur_box_idx * 5;
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      // Instead of devIoU used by original horizontal nms, here
+      // we use the single_box_iou_rotated function from box_iou_rotated_utils.h
+      if (single_box_iou_rotated<T>(cur_box, block_boxes + i * 5) >
+          iou_threshold) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+namespace detectron2 {
+
+at::Tensor nms_rotated_cuda(
+    // input must be contiguous
+    const at::Tensor& dets,
+    const at::Tensor& scores,
+    double iou_threshold) {
+  // using scalar_t = float;
+  AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor");
+  AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor");
+  at::cuda::CUDAGuard device_guard(dets.device());
+
+  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+  auto dets_sorted = dets.index_select(0, order_t);
+
+  auto dets_num = dets.size(0);
+
+  const int col_blocks =
+      at::cuda::ATenCeilDiv(static_cast<int>(dets_num), threadsPerBlock);
+
+  at::Tensor mask =
+      at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));
+
+  dim3 blocks(col_blocks, col_blocks);
+  dim3 threads(threadsPerBlock);
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  AT_DISPATCH_FLOATING_TYPES(
+      dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] {
+        nms_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+            dets_num,
+            iou_threshold,
+            dets_sorted.data_ptr<scalar_t>(),
+            (unsigned long long*)mask.data_ptr<int64_t>());
+      });
+
+  at::Tensor mask_cpu = mask.to(at::kCPU);
+  unsigned long long* mask_host =
+      (unsigned long long*)mask_cpu.data_ptr<int64_t>();
+
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  at::Tensor keep =
+      at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
+  int64_t* keep_out = keep.data_ptr<int64_t>();
+
+  int num_to_keep = 0;
+  for (int i = 0; i < dets_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[num_to_keep++] = i;
+      unsigned long long* p = mask_host + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+
+  AT_CUDA_CHECK(cudaGetLastError());
+  return order_t.index(
+      {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)
+           .to(order_t.device(), keep.scalar_type())});
+}
+
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/csrc/vision.cpp b/src/sts/detectron2/layers/csrc/vision.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f6c049f7b4970b5ab88bf4bea5c5cf95897da0f7
--- /dev/null
+++ b/src/sts/detectron2/layers/csrc/vision.cpp
@@ -0,0 +1,129 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+
+#include <torch/extension.h>
+#include "ROIAlignRotated/ROIAlignRotated.h"
+#include "box_iou_rotated/box_iou_rotated.h"
+#include "cocoeval/cocoeval.h"
+#include "deformable/deform_conv.h"
+#include "nms_rotated/nms_rotated.h"
+
+namespace detectron2 {
+
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+extern int get_cudart_version();
+#endif
+
+std::string get_cuda_version() {
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+  std::ostringstream oss;
+
+#if defined(WITH_CUDA)
+  oss << "CUDA ";
+#else
+  oss << "HIP ";
+#endif
+
+  // copied from
+  // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
+  auto printCudaStyleVersion = [&](int v) {
+    oss << (v / 1000) << "." << (v / 10 % 100);
+    if (v % 10 != 0) {
+      oss << "." << (v % 10);
+    }
+  };
+  printCudaStyleVersion(get_cudart_version());
+  return oss.str();
+#else // neither CUDA nor HIP
+  return std::string("not available");
+#endif
+}
+
+bool has_cuda() {
+#if defined(WITH_CUDA)
+  return true;
+#else
+  return false;
+#endif
+}
+
+// similar to
+// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
+std::string get_compiler_version() {
+  std::ostringstream ss;
+#if defined(__GNUC__)
+#ifndef __clang__
+
+#if ((__GNUC__ <= 4) && (__GNUC_MINOR__ <= 8))
+#error "GCC >= 4.9 is required!"
+#endif
+
+  { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
+#endif
+#endif
+
+#if defined(__clang_major__)
+  {
+    ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
+       << __clang_patchlevel__;
+  }
+#endif
+
+#if defined(_MSC_VER)
+  { ss << "MSVC " << _MSC_FULL_VER; }
+#endif
+  return ss.str();
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
+  m.def("get_cuda_version", &get_cuda_version, "get_cuda_version");
+  m.def("has_cuda", &has_cuda, "has_cuda");
+
+  m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes");
+
+  m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
+  m.def(
+      "deform_conv_backward_input",
+      &deform_conv_backward_input,
+      "deform_conv_backward_input");
+  m.def(
+      "deform_conv_backward_filter",
+      &deform_conv_backward_filter,
+      "deform_conv_backward_filter");
+  m.def(
+      "modulated_deform_conv_forward",
+      &modulated_deform_conv_forward,
+      "modulated_deform_conv_forward");
+  m.def(
+      "modulated_deform_conv_backward",
+      &modulated_deform_conv_backward,
+      "modulated_deform_conv_backward");
+
+  m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes");
+
+  m.def(
+      "roi_align_rotated_forward",
+      &ROIAlignRotated_forward,
+      "Forward pass for Rotated ROI-Align Operator");
+  m.def(
+      "roi_align_rotated_backward",
+      &ROIAlignRotated_backward,
+      "Backward pass for Rotated ROI-Align Operator");
+
+  m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
+  m.def(
+      "COCOevalEvaluateImages",
+      &COCOeval::EvaluateImages,
+      "COCOeval::EvaluateImages");
+  pybind11::class_<COCOeval::InstanceAnnotation>(m, "InstanceAnnotation")
+      .def(pybind11::init<uint64_t, double, double, bool, bool>());
+  pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
+      .def(pybind11::init<>());
+}
+
+#ifdef TORCH_LIBRARY
+TORCH_LIBRARY(detectron2, m) {
+  m.def("nms_rotated", &nms_rotated);
+}
+#endif
+} // namespace detectron2
diff --git a/src/sts/detectron2/layers/deform_conv.py b/src/sts/detectron2/layers/deform_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..eca070f59645af4c9ccd003d99678f19538f355d
--- /dev/null
+++ b/src/sts/detectron2/layers/deform_conv.py
@@ -0,0 +1,501 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+from functools import lru_cache
+import torch
+from torch import nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+from torchvision.ops import deform_conv2d
+
+from detectron2 import _C
+
+from .wrappers import _NewEmptyTensorOp
+
+
+class _DeformConv(Function):
+    @staticmethod
+    def forward(
+        ctx,
+        input,
+        offset,
+        weight,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        deformable_groups=1,
+        im2col_step=64,
+    ):
+        if input is not None and input.dim() != 4:
+            raise ValueError(
+                "Expected 4D tensor as input, got {}D tensor instead.".format(input.dim())
+            )
+        ctx.stride = _pair(stride)
+        ctx.padding = _pair(padding)
+        ctx.dilation = _pair(dilation)
+        ctx.groups = groups
+        ctx.deformable_groups = deformable_groups
+        ctx.im2col_step = im2col_step
+
+        ctx.save_for_backward(input, offset, weight)
+
+        output = input.new_empty(
+            _DeformConv._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride)
+        )
+
+        ctx.bufs_ = [input.new_empty(0), input.new_empty(0)]  # columns, ones
+
+        if not input.is_cuda:
+            if deformable_groups != 1:
+                raise NotImplementedError(
+                    "Deformable Conv with deformable_groups != 1 is not supported on CPUs!"
+                )
+            return deform_conv2d(
+                input, offset, weight, stride=stride, padding=padding, dilation=dilation
+            )
+        else:
+            cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step)
+            assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize"
+
+            _C.deform_conv_forward(
+                input,
+                weight,
+                offset,
+                output,
+                ctx.bufs_[0],
+                ctx.bufs_[1],
+                weight.size(3),
+                weight.size(2),
+                ctx.stride[1],
+                ctx.stride[0],
+                ctx.padding[1],
+                ctx.padding[0],
+                ctx.dilation[1],
+                ctx.dilation[0],
+                ctx.groups,
+                ctx.deformable_groups,
+                cur_im2col_step,
+            )
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, offset, weight = ctx.saved_tensors
+
+        grad_input = grad_offset = grad_weight = None
+
+        if not grad_output.is_cuda:
+            raise NotImplementedError("Deformable Conv is not supported on CPUs!")
+        else:
+            cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step)
+            assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize"
+
+            if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+                grad_input = torch.zeros_like(input)
+                grad_offset = torch.zeros_like(offset)
+                _C.deform_conv_backward_input(
+                    input,
+                    offset,
+                    grad_output,
+                    grad_input,
+                    grad_offset,
+                    weight,
+                    ctx.bufs_[0],
+                    weight.size(3),
+                    weight.size(2),
+                    ctx.stride[1],
+                    ctx.stride[0],
+                    ctx.padding[1],
+                    ctx.padding[0],
+                    ctx.dilation[1],
+                    ctx.dilation[0],
+                    ctx.groups,
+                    ctx.deformable_groups,
+                    cur_im2col_step,
+                )
+
+            if ctx.needs_input_grad[2]:
+                grad_weight = torch.zeros_like(weight)
+                _C.deform_conv_backward_filter(
+                    input,
+                    offset,
+                    grad_output,
+                    grad_weight,
+                    ctx.bufs_[0],
+                    ctx.bufs_[1],
+                    weight.size(3),
+                    weight.size(2),
+                    ctx.stride[1],
+                    ctx.stride[0],
+                    ctx.padding[1],
+                    ctx.padding[0],
+                    ctx.dilation[1],
+                    ctx.dilation[0],
+                    ctx.groups,
+                    ctx.deformable_groups,
+                    1,
+                    cur_im2col_step,
+                )
+
+        return grad_input, grad_offset, grad_weight, None, None, None, None, None, None
+
+    @staticmethod
+    def _output_size(input, weight, padding, dilation, stride):
+        channels = weight.size(0)
+        output_size = (input.size(0), channels)
+        for d in range(input.dim() - 2):
+            in_size = input.size(d + 2)
+            pad = padding[d]
+            kernel = dilation[d] * (weight.size(d + 2) - 1) + 1
+            stride_ = stride[d]
+            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,)
+        if not all(map(lambda s: s > 0, output_size)):
+            raise ValueError(
+                "convolution input is too small (output would be {})".format(
+                    "x".join(map(str, output_size))
+                )
+            )
+        return output_size
+
+    @staticmethod
+    @lru_cache(maxsize=128)
+    def _cal_im2col_step(input_size, default_size):
+        """
+        Calculate proper im2col step size, which should be divisible by input_size and not larger
+        than prefer_size. Meanwhile the step size should be as large as possible to be more
+        efficient. So we choose the largest one among all divisors of input_size which are smaller
+        than prefer_size.
+        :param input_size: input batch size .
+        :param default_size: default preferred im2col step size.
+        :return: the largest proper step size.
+        """
+        if input_size <= default_size:
+            return input_size
+        best_step = 1
+        for step in range(2, min(int(math.sqrt(input_size)) + 1, default_size)):
+            if input_size % step == 0:
+                if input_size // step <= default_size:
+                    return input_size // step
+                best_step = step
+
+        return best_step
+
+
+class _ModulatedDeformConv(Function):
+    @staticmethod
+    def forward(
+        ctx,
+        input,
+        offset,
+        mask,
+        weight,
+        bias=None,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        deformable_groups=1,
+    ):
+        ctx.stride = stride
+        ctx.padding = padding
+        ctx.dilation = dilation
+        ctx.groups = groups
+        ctx.deformable_groups = deformable_groups
+        ctx.with_bias = bias is not None
+        if not ctx.with_bias:
+            bias = input.new_empty(1)  # fake tensor
+        if not input.is_cuda:
+            raise NotImplementedError("Deformable Conv is not supported on CPUs!")
+        if (
+            weight.requires_grad
+            or mask.requires_grad
+            or offset.requires_grad
+            or input.requires_grad
+        ):
+            ctx.save_for_backward(input, offset, mask, weight, bias)
+        output = input.new_empty(_ModulatedDeformConv._infer_shape(ctx, input, weight))
+        ctx._bufs = [input.new_empty(0), input.new_empty(0)]
+        _C.modulated_deform_conv_forward(
+            input,
+            weight,
+            bias,
+            ctx._bufs[0],
+            offset,
+            mask,
+            output,
+            ctx._bufs[1],
+            weight.shape[2],
+            weight.shape[3],
+            ctx.stride,
+            ctx.stride,
+            ctx.padding,
+            ctx.padding,
+            ctx.dilation,
+            ctx.dilation,
+            ctx.groups,
+            ctx.deformable_groups,
+            ctx.with_bias,
+        )
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        if not grad_output.is_cuda:
+            raise NotImplementedError("Deformable Conv is not supported on CPUs!")
+        input, offset, mask, weight, bias = ctx.saved_tensors
+        grad_input = torch.zeros_like(input)
+        grad_offset = torch.zeros_like(offset)
+        grad_mask = torch.zeros_like(mask)
+        grad_weight = torch.zeros_like(weight)
+        grad_bias = torch.zeros_like(bias)
+        _C.modulated_deform_conv_backward(
+            input,
+            weight,
+            bias,
+            ctx._bufs[0],
+            offset,
+            mask,
+            ctx._bufs[1],
+            grad_input,
+            grad_weight,
+            grad_bias,
+            grad_offset,
+            grad_mask,
+            grad_output,
+            weight.shape[2],
+            weight.shape[3],
+            ctx.stride,
+            ctx.stride,
+            ctx.padding,
+            ctx.padding,
+            ctx.dilation,
+            ctx.dilation,
+            ctx.groups,
+            ctx.deformable_groups,
+            ctx.with_bias,
+        )
+        if not ctx.with_bias:
+            grad_bias = None
+
+        return (
+            grad_input,
+            grad_offset,
+            grad_mask,
+            grad_weight,
+            grad_bias,
+            None,
+            None,
+            None,
+            None,
+            None,
+        )
+
+    @staticmethod
+    def _infer_shape(ctx, input, weight):
+        n = input.size(0)
+        channels_out = weight.size(0)
+        height, width = input.shape[2:4]
+        kernel_h, kernel_w = weight.shape[2:4]
+        height_out = (
+            height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1)
+        ) // ctx.stride + 1
+        width_out = (
+            width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1)
+        ) // ctx.stride + 1
+        return n, channels_out, height_out, width_out
+
+
+deform_conv = _DeformConv.apply
+modulated_deform_conv = _ModulatedDeformConv.apply
+
+
+class DeformConv(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        deformable_groups=1,
+        bias=False,
+        norm=None,
+        activation=None,
+    ):
+        """
+        Deformable convolution from :paper:`deformconv`.
+
+        Arguments are similar to :class:`Conv2D`. Extra arguments:
+
+        Args:
+            deformable_groups (int): number of groups used in deformable convolution.
+            norm (nn.Module, optional): a normalization layer
+            activation (callable(Tensor) -> Tensor): a callable activation function
+        """
+        super(DeformConv, self).__init__()
+
+        assert not bias
+        assert in_channels % groups == 0, "in_channels {} cannot be divisible by groups {}".format(
+            in_channels, groups
+        )
+        assert (
+            out_channels % groups == 0
+        ), "out_channels {} cannot be divisible by groups {}".format(out_channels, groups)
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.dilation = _pair(dilation)
+        self.groups = groups
+        self.deformable_groups = deformable_groups
+        self.norm = norm
+        self.activation = activation
+
+        self.weight = nn.Parameter(
+            torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)
+        )
+        self.bias = None
+
+        nn.init.kaiming_uniform_(self.weight, nonlinearity="relu")
+
+    def forward(self, x, offset):
+        if x.numel() == 0:
+            # When input is empty, we want to return a empty tensor with "correct" shape,
+            # So that the following operations will not panic
+            # if they check for the shape of the tensor.
+            # This computes the height and width of the output tensor
+            output_shape = [
+                (i + 2 * p - (di * (k - 1) + 1)) // s + 1
+                for i, p, di, k, s in zip(
+                    x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
+                )
+            ]
+            output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
+            return _NewEmptyTensorOp.apply(x, output_shape)
+
+        x = deform_conv(
+            x,
+            offset,
+            self.weight,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+            self.deformable_groups,
+        )
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def extra_repr(self):
+        tmpstr = "in_channels=" + str(self.in_channels)
+        tmpstr += ", out_channels=" + str(self.out_channels)
+        tmpstr += ", kernel_size=" + str(self.kernel_size)
+        tmpstr += ", stride=" + str(self.stride)
+        tmpstr += ", padding=" + str(self.padding)
+        tmpstr += ", dilation=" + str(self.dilation)
+        tmpstr += ", groups=" + str(self.groups)
+        tmpstr += ", deformable_groups=" + str(self.deformable_groups)
+        tmpstr += ", bias=False"
+        return tmpstr
+
+
+class ModulatedDeformConv(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        deformable_groups=1,
+        bias=True,
+        norm=None,
+        activation=None,
+    ):
+        """
+        Modulated deformable convolution from :paper:`deformconv2`.
+
+        Arguments are similar to :class:`Conv2D`. Extra arguments:
+
+        Args:
+            deformable_groups (int): number of groups used in deformable convolution.
+            norm (nn.Module, optional): a normalization layer
+            activation (callable(Tensor) -> Tensor): a callable activation function
+        """
+        super(ModulatedDeformConv, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.groups = groups
+        self.deformable_groups = deformable_groups
+        self.with_bias = bias
+        self.norm = norm
+        self.activation = activation
+
+        self.weight = nn.Parameter(
+            torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)
+        )
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(out_channels))
+        else:
+            self.bias = None
+
+        nn.init.kaiming_uniform_(self.weight, nonlinearity="relu")
+        if self.bias is not None:
+            nn.init.constant_(self.bias, 0)
+
+    def forward(self, x, offset, mask):
+        if x.numel() == 0:
+            output_shape = [
+                (i + 2 * p - (di * (k - 1) + 1)) // s + 1
+                for i, p, di, k, s in zip(
+                    x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
+                )
+            ]
+            output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
+            return _NewEmptyTensorOp.apply(x, output_shape)
+
+        x = modulated_deform_conv(
+            x,
+            offset,
+            mask,
+            self.weight,
+            self.bias,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+            self.deformable_groups,
+        )
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def extra_repr(self):
+        tmpstr = "in_channels=" + str(self.in_channels)
+        tmpstr += ", out_channels=" + str(self.out_channels)
+        tmpstr += ", kernel_size=" + str(self.kernel_size)
+        tmpstr += ", stride=" + str(self.stride)
+        tmpstr += ", padding=" + str(self.padding)
+        tmpstr += ", dilation=" + str(self.dilation)
+        tmpstr += ", groups=" + str(self.groups)
+        tmpstr += ", deformable_groups=" + str(self.deformable_groups)
+        tmpstr += ", bias=" + str(self.with_bias)
+        return tmpstr
diff --git a/src/sts/detectron2/layers/mask_ops.py b/src/sts/detectron2/layers/mask_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..c698a03c4d3faf30c08da97169f010b64c0d1058
--- /dev/null
+++ b/src/sts/detectron2/layers/mask_ops.py
@@ -0,0 +1,260 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from typing import Tuple
+import torch
+from PIL import Image
+from torch.nn import functional as F
+
+from detectron2.structures import Boxes
+
+__all__ = ["paste_masks_in_image"]
+
+
+BYTES_PER_FLOAT = 4
+# TODO: This memory limit may be too much or too little. It would be better to
+# determine it based on available resources.
+GPU_MEM_LIMIT = 1024 ** 3  # 1 GB memory limit
+
+
+def _do_paste_mask(masks, boxes, img_h: int, img_w: int, skip_empty: bool = True):
+    """
+    Args:
+        masks: N, 1, H, W
+        boxes: N, 4
+        img_h, img_w (int):
+        skip_empty (bool): only paste masks within the region that
+            tightly bound all boxes, and returns the results this region only.
+            An important optimization for CPU.
+
+    Returns:
+        if skip_empty == False, a mask of shape (N, img_h, img_w)
+        if skip_empty == True, a mask of shape (N, h', w'), and the slice
+            object for the corresponding region.
+    """
+    # On GPU, paste all masks together (up to chunk size)
+    # by using the entire image to sample the masks
+    # Compared to pasting them one by one,
+    # this has more operations but is faster on COCO-scale dataset.
+    device = masks.device
+
+    if skip_empty and not torch.jit.is_scripting():
+        x0_int, y0_int = torch.clamp(boxes.min(dim=0).values.floor()[:2] - 1, min=0).to(
+            dtype=torch.int32
+        )
+        x1_int = torch.clamp(boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32)
+        y1_int = torch.clamp(boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32)
+    else:
+        x0_int, y0_int = 0, 0
+        x1_int, y1_int = img_w, img_h
+    x0, y0, x1, y1 = torch.split(boxes, 1, dim=1)  # each is Nx1
+
+    N = masks.shape[0]
+
+    img_y = torch.arange(y0_int, y1_int, device=device, dtype=torch.float32) + 0.5
+    img_x = torch.arange(x0_int, x1_int, device=device, dtype=torch.float32) + 0.5
+    img_y = (img_y - y0) / (y1 - y0) * 2 - 1
+    img_x = (img_x - x0) / (x1 - x0) * 2 - 1
+    # img_x, img_y have shapes (N, w), (N, h)
+
+    gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1))
+    gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1))
+    grid = torch.stack([gx, gy], dim=3)
+
+    if not torch.jit.is_scripting():
+        if not masks.dtype.is_floating_point:
+            masks = masks.float()
+    img_masks = F.grid_sample(masks, grid.to(masks.dtype), align_corners=False)
+
+    if skip_empty and not torch.jit.is_scripting():
+        return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int))
+    else:
+        return img_masks[:, 0], ()
+
+
+def paste_masks_in_image(
+    masks: torch.Tensor, boxes: Boxes, image_shape: Tuple[int, int], threshold: float = 0.5
+):
+    """
+    Paste a set of masks that are of a fixed resolution (e.g., 28 x 28) into an image.
+    The location, height, and width for pasting each mask is determined by their
+    corresponding bounding boxes in boxes.
+
+    Note:
+        This is a complicated but more accurate implementation. In actual deployment, it is
+        often enough to use a faster but less accurate implementation.
+        See :func:`paste_mask_in_image_old` in this file for an alternative implementation.
+
+    Args:
+        masks (tensor): Tensor of shape (Bimg, Hmask, Wmask), where Bimg is the number of
+            detected object instances in the image and Hmask, Wmask are the mask width and mask
+            height of the predicted mask (e.g., Hmask = Wmask = 28). Values are in [0, 1].
+        boxes (Boxes or Tensor): A Boxes of length Bimg or Tensor of shape (Bimg, 4).
+            boxes[i] and masks[i] correspond to the same object instance.
+        image_shape (tuple): height, width
+        threshold (float): A threshold in [0, 1] for converting the (soft) masks to
+            binary masks.
+
+    Returns:
+        img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the
+        number of detected object instances and Himage, Wimage are the image width
+        and height. img_masks[i] is a binary mask for object instance i.
+    """
+
+    assert masks.shape[-1] == masks.shape[-2], "Only square mask predictions are supported"
+    N = len(masks)
+    if N == 0:
+        return masks.new_empty((0,) + image_shape, dtype=torch.uint8)
+    if not isinstance(boxes, torch.Tensor):
+        boxes = boxes.tensor
+    device = boxes.device
+    assert len(boxes) == N, boxes.shape
+
+    img_h, img_w = image_shape
+
+    # The actual implementation split the input into chunks,
+    # and paste them chunk by chunk.
+    if device.type == "cpu" or torch.jit.is_scripting():
+        # CPU is most efficient when they are pasted one by one with skip_empty=True
+        # so that it performs minimal number of operations.
+        num_chunks = N
+    else:
+        # GPU benefits from parallelism for larger chunks, but may have memory issue
+        # int(img_h) because shape may be tensors in tracing
+        num_chunks = int(np.ceil(N * int(img_h) * int(img_w) * BYTES_PER_FLOAT / GPU_MEM_LIMIT))
+        assert (
+            num_chunks <= N
+        ), "Default GPU_MEM_LIMIT in mask_ops.py is too small; try increasing it"
+    chunks = torch.chunk(torch.arange(N, device=device), num_chunks)
+
+    img_masks = torch.zeros(
+        N, img_h, img_w, device=device, dtype=torch.bool if threshold >= 0 else torch.uint8
+    )
+    for inds in chunks:
+        masks_chunk, spatial_inds = _do_paste_mask(
+            masks[inds, None, :, :], boxes[inds], img_h, img_w, skip_empty=device.type == "cpu"
+        )
+
+        if threshold >= 0:
+            masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool)
+        else:
+            # for visualization and debugging
+            masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8)
+
+        if torch.jit.is_scripting():  # Scripting does not use the optimized codepath
+            img_masks[inds] = masks_chunk
+        else:
+            img_masks[(inds,) + spatial_inds] = masks_chunk
+    return img_masks
+
+
+# The below are the original paste function (from Detectron1) which has
+# larger quantization error.
+# It is faster on CPU, while the aligned one is faster on GPU thanks to grid_sample.
+
+
+def paste_mask_in_image_old(mask, box, img_h, img_w, threshold):
+    """
+    Paste a single mask in an image.
+    This is a per-box implementation of :func:`paste_masks_in_image`.
+    This function has larger quantization error due to incorrect pixel
+    modeling and is not used any more.
+
+    Args:
+        mask (Tensor): A tensor of shape (Hmask, Wmask) storing the mask of a single
+            object instance. Values are in [0, 1].
+        box (Tensor): A tensor of shape (4, ) storing the x0, y0, x1, y1 box corners
+            of the object instance.
+        img_h, img_w (int): Image height and width.
+        threshold (float): Mask binarization threshold in [0, 1].
+
+    Returns:
+        im_mask (Tensor):
+            The resized and binarized object mask pasted into the original
+            image plane (a tensor of shape (img_h, img_w)).
+    """
+    # Conversion from continuous box coordinates to discrete pixel coordinates
+    # via truncation (cast to int32). This determines which pixels to paste the
+    # mask onto.
+    box = box.to(dtype=torch.int32)  # Continuous to discrete coordinate conversion
+    # An example (1D) box with continuous coordinates (x0=0.7, x1=4.3) will map to
+    # a discrete coordinates (x0=0, x1=4). Note that box is mapped to 5 = x1 - x0 + 1
+    # pixels (not x1 - x0 pixels).
+    samples_w = box[2] - box[0] + 1  # Number of pixel samples, *not* geometric width
+    samples_h = box[3] - box[1] + 1  # Number of pixel samples, *not* geometric height
+
+    # Resample the mask from it's original grid to the new samples_w x samples_h grid
+    mask = Image.fromarray(mask.cpu().numpy())
+    mask = mask.resize((samples_w, samples_h), resample=Image.BILINEAR)
+    mask = np.array(mask, copy=False)
+
+    if threshold >= 0:
+        mask = np.array(mask > threshold, dtype=np.uint8)
+        mask = torch.from_numpy(mask)
+    else:
+        # for visualization and debugging, we also
+        # allow it to return an unmodified mask
+        mask = torch.from_numpy(mask * 255).to(torch.uint8)
+
+    im_mask = torch.zeros((img_h, img_w), dtype=torch.uint8)
+    x_0 = max(box[0], 0)
+    x_1 = min(box[2] + 1, img_w)
+    y_0 = max(box[1], 0)
+    y_1 = min(box[3] + 1, img_h)
+
+    im_mask[y_0:y_1, x_0:x_1] = mask[
+        (y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0])
+    ]
+    return im_mask
+
+
+# Our pixel modeling requires extrapolation for any continuous
+# coordinate < 0.5 or > length - 0.5. When sampling pixels on the masks,
+# we would like this extrapolation to be an interpolation between boundary values and zero,
+# instead of using absolute zero or boundary values.
+# Therefore `paste_mask_in_image_old` is often used with zero padding around the masks like this:
+# masks, scale = pad_masks(masks[:, 0, :, :], 1)
+# boxes = scale_boxes(boxes.tensor, scale)
+
+
+def pad_masks(masks, padding):
+    """
+    Args:
+        masks (tensor): A tensor of shape (B, M, M) representing B masks.
+        padding (int): Number of cells to pad on all sides.
+
+    Returns:
+        The padded masks and the scale factor of the padding size / original size.
+    """
+    B = masks.shape[0]
+    M = masks.shape[-1]
+    pad2 = 2 * padding
+    scale = float(M + pad2) / M
+    padded_masks = masks.new_zeros((B, M + pad2, M + pad2))
+    padded_masks[:, padding:-padding, padding:-padding] = masks
+    return padded_masks, scale
+
+
+def scale_boxes(boxes, scale):
+    """
+    Args:
+        boxes (tensor): A tensor of shape (B, 4) representing B boxes with 4
+            coords representing the corners x0, y0, x1, y1,
+        scale (float): The box scaling factor.
+
+    Returns:
+        Scaled boxes.
+    """
+    w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
+    h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
+    x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
+    y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
+
+    w_half *= scale
+    h_half *= scale
+
+    scaled_boxes = torch.zeros_like(boxes)
+    scaled_boxes[:, 0] = x_c - w_half
+    scaled_boxes[:, 2] = x_c + w_half
+    scaled_boxes[:, 1] = y_c - h_half
+    scaled_boxes[:, 3] = y_c + h_half
+    return scaled_boxes
diff --git a/src/sts/detectron2/layers/nms.py b/src/sts/detectron2/layers/nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac14d459259b19a1a145adff2817a0ca0441b7eb
--- /dev/null
+++ b/src/sts/detectron2/layers/nms.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from typing import List
+import torch
+from torchvision.ops import boxes as box_ops
+from torchvision.ops import nms  # BC-compat
+
+from detectron2.utils.env import TORCH_VERSION
+
+if TORCH_VERSION < (1, 7):
+    from detectron2 import _C
+
+    nms_rotated_func = _C.nms_rotated
+else:
+    nms_rotated_func = torch.ops.detectron2.nms_rotated
+
+
+def batched_nms(
+    boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float
+):
+    """
+    Same as torchvision.ops.boxes.batched_nms, but safer.
+    """
+    assert boxes.shape[-1] == 4
+    # TODO may need better strategy.
+    # Investigate after having a fully-cuda NMS op.
+    if len(boxes) < 40000:
+        # fp16 does not have enough range for batched NMS
+        return box_ops.batched_nms(boxes.float(), scores, idxs, iou_threshold)
+
+    result_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
+    for id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()):
+        mask = (idxs == id).nonzero().view(-1)
+        keep = nms(boxes[mask], scores[mask], iou_threshold)
+        result_mask[mask[keep]] = True
+    keep = result_mask.nonzero().view(-1)
+    keep = keep[scores[keep].argsort(descending=True)]
+    return keep
+
+
+# Note: this function (nms_rotated) might be moved into
+# torchvision/ops/boxes.py in the future
+def nms_rotated(boxes, scores, iou_threshold):
+    """
+    Performs non-maximum suppression (NMS) on the rotated boxes according
+    to their intersection-over-union (IoU).
+
+    Rotated NMS iteratively removes lower scoring rotated boxes which have an
+    IoU greater than iou_threshold with another (higher scoring) rotated box.
+
+    Note that RotatedBox (5, 3, 4, 2, -90) covers exactly the same region as
+    RotatedBox (5, 3, 4, 2, 90) does, and their IoU will be 1. However, they
+    can be representing completely different objects in certain tasks, e.g., OCR.
+
+    As for the question of whether rotated-NMS should treat them as faraway boxes
+    even though their IOU is 1, it depends on the application and/or ground truth annotation.
+
+    As an extreme example, consider a single character v and the square box around it.
+
+    If the angle is 0 degree, the object (text) would be read as 'v';
+
+    If the angle is 90 degrees, the object (text) would become '>';
+
+    If the angle is 180 degrees, the object (text) would become '^';
+
+    If the angle is 270/-90 degrees, the object (text) would become '<'
+
+    All of these cases have IoU of 1 to each other, and rotated NMS that only
+    uses IoU as criterion would only keep one of them with the highest score -
+    which, practically, still makes sense in most cases because typically
+    only one of theses orientations is the correct one. Also, it does not matter
+    as much if the box is only used to classify the object (instead of transcribing
+    them with a sequential OCR recognition model) later.
+
+    On the other hand, when we use IoU to filter proposals that are close to the
+    ground truth during training, we should definitely take the angle into account if
+    we know the ground truth is labeled with the strictly correct orientation (as in,
+    upside-down words are annotated with -180 degrees even though they can be covered
+    with a 0/90/-90 degree box, etc.)
+
+    The way the original dataset is annotated also matters. For example, if the dataset
+    is a 4-point polygon dataset that does not enforce ordering of vertices/orientation,
+    we can estimate a minimum rotated bounding box to this polygon, but there's no way
+    we can tell the correct angle with 100% confidence (as shown above, there could be 4 different
+    rotated boxes, with angles differed by 90 degrees to each other, covering the exactly
+    same region). In that case we have to just use IoU to determine the box
+    proximity (as many detection benchmarks (even for text) do) unless there're other
+    assumptions we can make (like width is always larger than height, or the object is not
+    rotated by more than 90 degrees CCW/CW, etc.)
+
+    In summary, not considering angles in rotated NMS seems to be a good option for now,
+    but we should be aware of its implications.
+
+    Args:
+        boxes (Tensor[N, 5]): Rotated boxes to perform NMS on. They are expected to be in
+           (x_center, y_center, width, height, angle_degrees) format.
+        scores (Tensor[N]): Scores for each one of the rotated boxes
+        iou_threshold (float): Discards all overlapping rotated boxes with IoU < iou_threshold
+
+    Returns:
+        keep (Tensor): int64 tensor with the indices of the elements that have been kept
+        by Rotated NMS, sorted in decreasing order of scores
+    """
+    return nms_rotated_func(boxes, scores, iou_threshold)
+
+
+# Note: this function (batched_nms_rotated) might be moved into
+# torchvision/ops/boxes.py in the future
+def batched_nms_rotated(boxes, scores, idxs, iou_threshold):
+    """
+    Performs non-maximum suppression in a batched fashion.
+
+    Each index value correspond to a category, and NMS
+    will not be applied between elements of different categories.
+
+    Args:
+        boxes (Tensor[N, 5]):
+           boxes where NMS will be performed. They
+           are expected to be in (x_ctr, y_ctr, width, height, angle_degrees) format
+        scores (Tensor[N]):
+           scores for each one of the boxes
+        idxs (Tensor[N]):
+           indices of the categories for each one of the boxes.
+        iou_threshold (float):
+           discards all overlapping boxes
+           with IoU < iou_threshold
+
+    Returns:
+        Tensor:
+            int64 tensor with the indices of the elements that have been kept
+            by NMS, sorted in decreasing order of scores
+    """
+    assert boxes.shape[-1] == 5
+
+    if boxes.numel() == 0:
+        return torch.empty((0,), dtype=torch.int64, device=boxes.device)
+    boxes = boxes.float()  # fp16 does not have enough range for batched NMS
+    # Strategy: in order to perform NMS independently per class,
+    # we add an offset to all the boxes. The offset is dependent
+    # only on the class idx, and is large enough so that boxes
+    # from different classes do not overlap
+
+    # Note that batched_nms in torchvision/ops/boxes.py only uses max_coordinate,
+    # which won't handle negative coordinates correctly.
+    # Here by using min_coordinate we can make sure the negative coordinates are
+    # correctly handled.
+    max_coordinate = (
+        torch.max(boxes[:, 0], boxes[:, 1]) + torch.max(boxes[:, 2], boxes[:, 3]) / 2
+    ).max()
+    min_coordinate = (
+        torch.min(boxes[:, 0], boxes[:, 1]) - torch.max(boxes[:, 2], boxes[:, 3]) / 2
+    ).min()
+    offsets = idxs.to(boxes) * (max_coordinate - min_coordinate + 1)
+    boxes_for_nms = boxes.clone()  # avoid modifying the original values in boxes
+    boxes_for_nms[:, :2] += offsets[:, None]
+    keep = nms_rotated(boxes_for_nms, scores, iou_threshold)
+    return keep
diff --git a/src/sts/detectron2/layers/roi_align.py b/src/sts/detectron2/layers/roi_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcbf5f4c7025c905603f95dce7bb5c42d5379987
--- /dev/null
+++ b/src/sts/detectron2/layers/roi_align.py
@@ -0,0 +1,72 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from torch import nn
+from torchvision.ops import roi_align
+
+
+# NOTE: torchvision's RoIAlign has a different default aligned=False
+class ROIAlign(nn.Module):
+    def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
+        """
+        Args:
+            output_size (tuple): h, w
+            spatial_scale (float): scale the input boxes by this number
+            sampling_ratio (int): number of inputs samples to take for each output
+                sample. 0 to take samples densely.
+            aligned (bool): if False, use the legacy implementation in
+                Detectron. If True, align the results more perfectly.
+
+        Note:
+            The meaning of aligned=True:
+
+            Given a continuous coordinate c, its two neighboring pixel indices (in our
+            pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
+            c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
+            from the underlying signal at continuous coordinates 0.5 and 1.5). But the original
+            roi_align (aligned=False) does not subtract the 0.5 when computing neighboring
+            pixel indices and therefore it uses pixels with a slightly incorrect alignment
+            (relative to our pixel model) when performing bilinear interpolation.
+
+            With `aligned=True`,
+            we first appropriately scale the ROI and then shift it by -0.5
+            prior to calling roi_align. This produces the correct neighbors; see
+            detectron2/tests/test_roi_align.py for verification.
+
+            The difference does not make a difference to the model's performance if
+            ROIAlign is used together with conv layers.
+        """
+        super().__init__()
+        self.output_size = output_size
+        self.spatial_scale = spatial_scale
+        self.sampling_ratio = sampling_ratio
+        self.aligned = aligned
+
+        from torchvision import __version__
+
+        version = tuple(int(x) for x in __version__.split(".")[:2])
+        # https://github.com/pytorch/vision/pull/2438
+        assert version >= (0, 7), "Require torchvision >= 0.7"
+
+    def forward(self, input, rois):
+        """
+        Args:
+            input: NCHW images
+            rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
+        """
+        assert rois.dim() == 2 and rois.size(1) == 5
+        return roi_align(
+            input,
+            rois.to(dtype=input.dtype),
+            self.output_size,
+            self.spatial_scale,
+            self.sampling_ratio,
+            self.aligned,
+        )
+
+    def __repr__(self):
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += "output_size=" + str(self.output_size)
+        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
+        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
+        tmpstr += ", aligned=" + str(self.aligned)
+        tmpstr += ")"
+        return tmpstr
diff --git a/src/sts/detectron2/layers/roi_align_rotated.py b/src/sts/detectron2/layers/roi_align_rotated.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3775e08fc9b9172f73c8ec7025a51ef2edd0a1d
--- /dev/null
+++ b/src/sts/detectron2/layers/roi_align_rotated.py
@@ -0,0 +1,93 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+from torch import nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from detectron2 import _C
+
+
+class _ROIAlignRotated(Function):
+    @staticmethod
+    def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
+        ctx.save_for_backward(roi)
+        ctx.output_size = _pair(output_size)
+        ctx.spatial_scale = spatial_scale
+        ctx.sampling_ratio = sampling_ratio
+        ctx.input_shape = input.size()
+        output = _C.roi_align_rotated_forward(
+            input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
+        )
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        (rois,) = ctx.saved_tensors
+        output_size = ctx.output_size
+        spatial_scale = ctx.spatial_scale
+        sampling_ratio = ctx.sampling_ratio
+        bs, ch, h, w = ctx.input_shape
+        grad_input = _C.roi_align_rotated_backward(
+            grad_output,
+            rois,
+            spatial_scale,
+            output_size[0],
+            output_size[1],
+            bs,
+            ch,
+            h,
+            w,
+            sampling_ratio,
+        )
+        return grad_input, None, None, None, None, None
+
+
+roi_align_rotated = _ROIAlignRotated.apply
+
+
+class ROIAlignRotated(nn.Module):
+    def __init__(self, output_size, spatial_scale, sampling_ratio):
+        """
+        Args:
+            output_size (tuple): h, w
+            spatial_scale (float): scale the input boxes by this number
+            sampling_ratio (int): number of inputs samples to take for each output
+                sample. 0 to take samples densely.
+
+        Note:
+            ROIAlignRotated supports continuous coordinate by default:
+            Given a continuous coordinate c, its two neighboring pixel indices (in our
+            pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
+            c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
+            from the underlying signal at continuous coordinates 0.5 and 1.5).
+        """
+        super(ROIAlignRotated, self).__init__()
+        self.output_size = output_size
+        self.spatial_scale = spatial_scale
+        self.sampling_ratio = sampling_ratio
+
+    def forward(self, input, rois):
+        """
+        Args:
+            input: NCHW images
+            rois: Bx6 boxes. First column is the index into N.
+                The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees).
+        """
+        assert rois.dim() == 2 and rois.size(1) == 6
+        orig_dtype = input.dtype
+        if orig_dtype == torch.float16:
+            input = input.float()
+            rois = rois.float()
+        return roi_align_rotated(
+            input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
+        ).to(dtype=orig_dtype)
+
+    def __repr__(self):
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += "output_size=" + str(self.output_size)
+        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
+        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
+        tmpstr += ")"
+        return tmpstr
diff --git a/src/sts/detectron2/layers/rotated_boxes.py b/src/sts/detectron2/layers/rotated_boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0004f765ef355ad47d92d26d3012be382e1b3eca
--- /dev/null
+++ b/src/sts/detectron2/layers/rotated_boxes.py
@@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from detectron2 import _C
+
+
+def pairwise_iou_rotated(boxes1, boxes2):
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+
+    Both sets of boxes are expected to be in
+    (x_center, y_center, width, height, angle) format.
+
+    Arguments:
+        boxes1 (Tensor[N, 5])
+        boxes2 (Tensor[M, 5])
+
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+    return _C.box_iou_rotated(boxes1, boxes2)
diff --git a/src/sts/detectron2/layers/shape_spec.py b/src/sts/detectron2/layers/shape_spec.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe7e8e261c1ab1bb1636bd7a245068d64e632306
--- /dev/null
+++ b/src/sts/detectron2/layers/shape_spec.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+from collections import namedtuple
+
+
+class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
+    """
+    A simple structure that contains basic shape specification about a tensor.
+    It is often used as the auxiliary inputs/outputs of models,
+    to complement the lack of shape inference ability among pytorch modules.
+
+    Attributes:
+        channels:
+        height:
+        width:
+        stride:
+    """
+
+    def __new__(cls, channels=None, height=None, width=None, stride=None):
+        return super().__new__(cls, channels, height, width, stride)
diff --git a/src/sts/detectron2/layers/wrappers.py b/src/sts/detectron2/layers/wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bb4e7c1a1334c5501a6c492ddfa836dadf0beab
--- /dev/null
+++ b/src/sts/detectron2/layers/wrappers.py
@@ -0,0 +1,110 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Wrappers around on some nn functions, mainly to support empty tensors.
+
+Ideally, add support directly in PyTorch to empty tensors in those functions.
+
+These can be removed once https://github.com/pytorch/pytorch/issues/12013
+is implemented
+"""
+
+from typing import List
+import torch
+from torch.nn import functional as F
+
+
+def cat(tensors: List[torch.Tensor], dim: int = 0):
+    """
+    Efficient version of torch.cat that avoids a copy if there is only a single element in a list
+    """
+    assert isinstance(tensors, (list, tuple))
+    if len(tensors) == 1:
+        return tensors[0]
+    return torch.cat(tensors, dim)
+
+
+def cross_entropy(input, target, *, reduction="mean", **kwargs):
+    """
+    Same as `torch.nn.functional.cross_entropy`, but returns 0 (instead of nan)
+    for empty inputs.
+    """
+    if target.numel() == 0 and reduction == "mean":
+        return input.sum() * 0.0  # connect the gradient
+    return F.cross_entropy(input, target, **kwargs)
+
+
+class _NewEmptyTensorOp(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, new_shape):
+        ctx.shape = x.shape
+        return x.new_empty(new_shape)
+
+    @staticmethod
+    def backward(ctx, grad):
+        shape = ctx.shape
+        return _NewEmptyTensorOp.apply(grad, shape), None
+
+
+class Conv2d(torch.nn.Conv2d):
+    """
+    A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
+
+        Args:
+            norm (nn.Module, optional): a normalization layer
+            activation (callable(Tensor) -> Tensor): a callable activation function
+
+        It assumes that norm layer is used before activation.
+        """
+        norm = kwargs.pop("norm", None)
+        activation = kwargs.pop("activation", None)
+        super().__init__(*args, **kwargs)
+
+        self.norm = norm
+        self.activation = activation
+
+    def forward(self, x):
+        # torchscript does not support SyncBatchNorm yet
+        # https://github.com/pytorch/pytorch/issues/40507
+        # and we skip these codes in torchscript since:
+        # 1. currently we only support torchscript in evaluation mode
+        # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or
+        # later version, `Conv2d` in these PyTorch versions has already supported empty inputs.
+        if not torch.jit.is_scripting():
+            if x.numel() == 0 and self.training:
+                # https://github.com/pytorch/pytorch/issues/12013
+                assert not isinstance(
+                    self.norm, torch.nn.SyncBatchNorm
+                ), "SyncBatchNorm does not support empty inputs!"
+
+        x = F.conv2d(
+            x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
+        )
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+
+ConvTranspose2d = torch.nn.ConvTranspose2d
+BatchNorm2d = torch.nn.BatchNorm2d
+interpolate = F.interpolate
+Linear = torch.nn.Linear
+
+
+def nonzero_tuple(x):
+    """
+    A 'as_tuple=True' version of torch.nonzero to support torchscript.
+    because of https://github.com/pytorch/pytorch/issues/38718
+    """
+    if torch.jit.is_scripting():
+        if x.dim() == 0:
+            return x.unsqueeze(0).nonzero().unbind(1)
+        return x.nonzero().unbind(1)
+    else:
+        return x.nonzero(as_tuple=True)
diff --git a/src/sts/detectron2/model_zoo/__init__.py b/src/sts/detectron2/model_zoo/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcae6e18502bab72d76e220b7144b8c262d80e1f
--- /dev/null
+++ b/src/sts/detectron2/model_zoo/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Model Zoo API for Detectron2: a collection of functions to create common model architectures
+listed in `MODEL_ZOO.md <https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md>`_,
+and optionally load their pre-trained weights.
+"""
+
+from .model_zoo import get, get_config_file, get_checkpoint_url, get_config
+
+__all__ = ["get_checkpoint_url", "get", "get_config_file", "get_config"]
diff --git a/src/sts/detectron2/model_zoo/model_zoo.py b/src/sts/detectron2/model_zoo/model_zoo.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fa9a0cbdee685d8c9d70ea8a4e4a63fa3c3c7a7
--- /dev/null
+++ b/src/sts/detectron2/model_zoo/model_zoo.py
@@ -0,0 +1,172 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import os
+from typing import Optional
+import pkg_resources
+import torch
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.modeling import build_model
+
+
+class _ModelZooUrls(object):
+    """
+    Mapping from names to officially released Detectron2 pre-trained models.
+    """
+
+    S3_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
+
+    # format: {config_path.yaml} -> model_id/model_final_{commit}.pkl
+    CONFIG_PATH_TO_URL_SUFFIX = {
+        # COCO Detection with Faster R-CNN
+        "COCO-Detection/faster_rcnn_R_50_C4_1x.yaml": "137257644/model_final_721ade.pkl",
+        "COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml": "137847829/model_final_51d356.pkl",
+        "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml": "137257794/model_final_b275ba.pkl",
+        "COCO-Detection/faster_rcnn_R_50_C4_3x.yaml": "137849393/model_final_f97cb7.pkl",
+        "COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml": "137849425/model_final_68d202.pkl",
+        "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml": "137849458/model_final_280758.pkl",
+        "COCO-Detection/faster_rcnn_R_101_C4_3x.yaml": "138204752/model_final_298dad.pkl",
+        "COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml": "138204841/model_final_3e0943.pkl",
+        "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml": "137851257/model_final_f6e8b1.pkl",
+        "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml": "139173657/model_final_68b088.pkl",
+        # COCO Detection with RetinaNet
+        "COCO-Detection/retinanet_R_50_FPN_1x.yaml": "190397773/model_final_bfca0b.pkl",
+        "COCO-Detection/retinanet_R_50_FPN_3x.yaml": "190397829/model_final_5bd44e.pkl",
+        "COCO-Detection/retinanet_R_101_FPN_3x.yaml": "190397697/model_final_971ab9.pkl",
+        # COCO Detection with RPN and Fast R-CNN
+        "COCO-Detection/rpn_R_50_C4_1x.yaml": "137258005/model_final_450694.pkl",
+        "COCO-Detection/rpn_R_50_FPN_1x.yaml": "137258492/model_final_02ce48.pkl",
+        "COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml": "137635226/model_final_e5f7ce.pkl",
+        # COCO Instance Segmentation Baselines with Mask R-CNN
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml": "137259246/model_final_9243eb.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml": "137260150/model_final_4f86c3.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "137260431/model_final_a54504.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml": "137849525/model_final_4ce675.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml": "137849551/model_final_84107b.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml": "137849600/model_final_f10217.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml": "138363239/model_final_a2914c.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml": "138363294/model_final_0464b7.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml": "138205316/model_final_a3ec72.pkl",
+        "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml": "139653917/model_final_2d9806.pkl",  # noqa
+        # COCO Person Keypoint Detection Baselines with Keypoint R-CNN
+        "COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml": "137261548/model_final_04e291.pkl",
+        "COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml": "137849621/model_final_a6e10b.pkl",
+        "COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml": "138363331/model_final_997cc7.pkl",
+        "COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml": "139686956/model_final_5ad38f.pkl",
+        # COCO Panoptic Segmentation Baselines with Panoptic FPN
+        "COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml": "139514544/model_final_dbfeb4.pkl",
+        "COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml": "139514569/model_final_c10459.pkl",
+        "COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml": "139514519/model_final_cafdb1.pkl",
+        # LVIS Instance Segmentation Baselines with Mask R-CNN
+        "LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "144219072/model_final_571f7c.pkl",  # noqa
+        "LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml": "144219035/model_final_824ab5.pkl",  # noqa
+        "LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml": "144219108/model_final_5e3439.pkl",  # noqa
+        # Cityscapes & Pascal VOC Baselines
+        "Cityscapes/mask_rcnn_R_50_FPN.yaml": "142423278/model_final_af9cf5.pkl",
+        "PascalVOC-Detection/faster_rcnn_R_50_C4.yaml": "142202221/model_final_b1acc2.pkl",
+        # Other Settings
+        "Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml": "138602867/model_final_65c703.pkl",
+        "Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml": "144998336/model_final_821d0b.pkl",
+        "Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml": "138602847/model_final_e9d89b.pkl",
+        "Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml": "144998488/model_final_480dd8.pkl",
+        "Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml": "169527823/model_final_3b3c51.pkl",
+        "Misc/mask_rcnn_R_50_FPN_3x_gn.yaml": "138602888/model_final_dc5d9e.pkl",
+        "Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml": "138602908/model_final_01ca85.pkl",
+        "Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml": "183808979/model_final_da7b4c.pkl",
+        "Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml": "184226666/model_final_5ce33e.pkl",
+        "Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml": "139797668/model_final_be35db.pkl",
+        "Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml": "18131413/model_0039999_e76410.pkl",  # noqa
+        # D1 Comparisons
+        "Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml": "137781054/model_final_7ab50c.pkl",  # noqa
+        "Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml": "137781281/model_final_62ca52.pkl",  # noqa
+        "Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml": "137781195/model_final_cce136.pkl",
+    }
+
+
+def get_checkpoint_url(config_path):
+    """
+    Returns the URL to the model trained using the given config
+
+    Args:
+        config_path (str): config file name relative to detectron2's "configs/"
+            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+
+    Returns:
+        str: a URL to the model
+    """
+    name = config_path.replace(".yaml", "")
+    if config_path in _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX:
+        suffix = _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX[config_path]
+        return _ModelZooUrls.S3_PREFIX + name + "/" + suffix
+    raise RuntimeError("{} not available in Model Zoo!".format(name))
+
+
+def get_config_file(config_path):
+    """
+    Returns path to a builtin config file.
+
+    Args:
+        config_path (str): config file name relative to detectron2's "configs/"
+            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+
+    Returns:
+        str: the real path to the config file.
+    """
+    cfg_file = pkg_resources.resource_filename(
+        "detectron2.model_zoo", os.path.join("configs", config_path)
+    )
+    if not os.path.exists(cfg_file):
+        raise RuntimeError("{} not available in Model Zoo!".format(config_path))
+    return cfg_file
+
+
+def get_config(config_path, trained: bool = False):
+    """
+    Returns a config object for a model in model zoo.
+
+    Args:
+        config_path (str): config file name relative to detectron2's "configs/"
+            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+        trained (bool): If True, will set ``MODEL.WEIGHTS`` to trained model zoo weights.
+            If False, the checkpoint specified in the config file's ``MODEL.WEIGHTS`` is used
+            instead; this will typically (though not always) initialize a subset of weights using
+            an ImageNet pre-trained model, while randomly initializing the other weights.
+
+    Returns:
+        CfgNode: a config object
+    """
+    cfg_file = get_config_file(config_path)
+    cfg = get_cfg()
+    cfg.merge_from_file(cfg_file)
+    if trained:
+        cfg.MODEL.WEIGHTS = get_checkpoint_url(config_path)
+    return cfg
+
+
+def get(config_path, trained: bool = False, device: Optional[str] = None):
+    """
+    Get a model specified by relative path under Detectron2's official ``configs/`` directory.
+
+    Args:
+        config_path (str): config file name relative to detectron2's "configs/"
+            directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+        trained (bool): see :func:`get_config`.
+        device (str or None): overwrite the device in config, if given.
+
+    Returns:
+        nn.Module: a detectron2 model. Will be in training mode.
+
+    Example:
+    ::
+        from detectron2 import model_zoo
+        model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml", trained=True)
+    """
+    cfg = get_config(config_path, trained)
+    if device is not None:
+        cfg.MODEL.DEVICE = device
+    elif not torch.cuda.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+
+    model = build_model(cfg)
+    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
+    return model
diff --git a/src/sts/detectron2/modeling/__init__.py b/src/sts/detectron2/modeling/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0655f96b4618d716f62290ce65e7ae82335ea61f
--- /dev/null
+++ b/src/sts/detectron2/modeling/__init__.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from detectron2.layers import ShapeSpec
+
+from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY
+from .backbone import (
+    BACKBONE_REGISTRY,
+    FPN,
+    Backbone,
+    ResNet,
+    ResNetBlockBase,
+    build_backbone,
+    build_resnet_backbone,
+    make_stage,
+)
+from .meta_arch import (
+    META_ARCH_REGISTRY,
+    SEM_SEG_HEADS_REGISTRY,
+    GeneralizedRCNN,
+    PanopticFPN,
+    ProposalNetwork,
+    RetinaNet,
+    SemanticSegmentor,
+    build_model,
+    build_sem_seg_head,
+)
+from .postprocessing import detector_postprocess
+from .proposal_generator import (
+    PROPOSAL_GENERATOR_REGISTRY,
+    build_proposal_generator,
+    RPN_HEAD_REGISTRY,
+    build_rpn_head,
+)
+from .roi_heads import (
+    ROI_BOX_HEAD_REGISTRY,
+    ROI_HEADS_REGISTRY,
+    ROI_KEYPOINT_HEAD_REGISTRY,
+    ROI_MASK_HEAD_REGISTRY,
+    ROIHeads,
+    StandardROIHeads,
+    BaseMaskRCNNHead,
+    BaseKeypointRCNNHead,
+    FastRCNNOutputLayers,
+    build_box_head,
+    build_keypoint_head,
+    build_mask_head,
+    build_roi_heads,
+)
+from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
+from .mmdet_wrapper import MMDetBackbone, MMDetDetector
+
+_EXCLUDE = {"ShapeSpec"}
+__all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/src/sts/detectron2/modeling/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/modeling/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..28db13116f37cb133b539d942986effa48d72e81
Binary files /dev/null and b/src/sts/detectron2/modeling/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/__pycache__/anchor_generator.cpython-38.pyc b/src/sts/detectron2/modeling/__pycache__/anchor_generator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a54a872c1919d32a2e263cc2b661eb6ef5576b8
Binary files /dev/null and b/src/sts/detectron2/modeling/__pycache__/anchor_generator.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/__pycache__/box_regression.cpython-38.pyc b/src/sts/detectron2/modeling/__pycache__/box_regression.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3bab5213434d97c67b916d584edab762a39d76f0
Binary files /dev/null and b/src/sts/detectron2/modeling/__pycache__/box_regression.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/__pycache__/matcher.cpython-38.pyc b/src/sts/detectron2/modeling/__pycache__/matcher.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..06910194c31d36a4534e3b57b0a333d62a1c19b7
Binary files /dev/null and b/src/sts/detectron2/modeling/__pycache__/matcher.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/__pycache__/mmdet_wrapper.cpython-38.pyc b/src/sts/detectron2/modeling/__pycache__/mmdet_wrapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aaaeaac120c4df60ad7d99bce049457ed7bf7760
Binary files /dev/null and b/src/sts/detectron2/modeling/__pycache__/mmdet_wrapper.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/__pycache__/poolers.cpython-38.pyc b/src/sts/detectron2/modeling/__pycache__/poolers.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab0b09addcdcdfea5be4bedaf09839f92f0eca01
Binary files /dev/null and b/src/sts/detectron2/modeling/__pycache__/poolers.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/__pycache__/postprocessing.cpython-38.pyc b/src/sts/detectron2/modeling/__pycache__/postprocessing.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..82c3ff98242f8695c2067199ccbc297d3e37cce1
Binary files /dev/null and b/src/sts/detectron2/modeling/__pycache__/postprocessing.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/__pycache__/sampling.cpython-38.pyc b/src/sts/detectron2/modeling/__pycache__/sampling.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71ca1d1cf7a244fcd3aa007f666406bc16045eea
Binary files /dev/null and b/src/sts/detectron2/modeling/__pycache__/sampling.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/__pycache__/test_time_augmentation.cpython-38.pyc b/src/sts/detectron2/modeling/__pycache__/test_time_augmentation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..939964d8a17e1ec071c0f72157ca0d88a30949b5
Binary files /dev/null and b/src/sts/detectron2/modeling/__pycache__/test_time_augmentation.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/anchor_generator.py b/src/sts/detectron2/modeling/anchor_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8343e0ad573414c9123a75e2f51ec4487ed93d0
--- /dev/null
+++ b/src/sts/detectron2/modeling/anchor_generator.py
@@ -0,0 +1,381 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import collections
+import math
+from typing import List
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec
+from detectron2.structures import Boxes, RotatedBoxes
+from detectron2.utils.registry import Registry
+
+ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR")
+ANCHOR_GENERATOR_REGISTRY.__doc__ = """
+Registry for modules that creates object detection anchors for feature maps.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+"""
+
+
+class BufferList(nn.Module):
+    """
+    Similar to nn.ParameterList, but for buffers
+    """
+
+    def __init__(self, buffers):
+        super().__init__()
+        for i, buffer in enumerate(buffers):
+            self.register_buffer(str(i), buffer)
+
+    def __len__(self):
+        return len(self._buffers)
+
+    def __iter__(self):
+        return iter(self._buffers.values())
+
+
+def _create_grid_offsets(size: List[int], stride: int, offset: float, device: torch.device):
+    grid_height, grid_width = size
+    shifts_x = torch.arange(
+        offset * stride, grid_width * stride, step=stride, dtype=torch.float32, device=device
+    )
+    shifts_y = torch.arange(
+        offset * stride, grid_height * stride, step=stride, dtype=torch.float32, device=device
+    )
+
+    shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
+    shift_x = shift_x.reshape(-1)
+    shift_y = shift_y.reshape(-1)
+    return shift_x, shift_y
+
+
+def _broadcast_params(params, num_features, name):
+    """
+    If one size (or aspect ratio) is specified and there are multiple feature
+    maps, we "broadcast" anchors of that single size (or aspect ratio)
+    over all feature maps.
+
+    If params is list[float], or list[list[float]] with len(params) == 1, repeat
+    it num_features time.
+
+    Returns:
+        list[list[float]]: param for each feature
+    """
+    assert isinstance(
+        params, collections.abc.Sequence
+    ), f"{name} in anchor generator has to be a list! Got {params}."
+    assert len(params), f"{name} in anchor generator cannot be empty!"
+    if not isinstance(params[0], collections.abc.Sequence):  # params is list[float]
+        return [params] * num_features
+    if len(params) == 1:
+        return list(params) * num_features
+    assert len(params) == num_features, (
+        f"Got {name} of length {len(params)} in anchor generator, "
+        f"but the number of input features is {num_features}!"
+    )
+    return params
+
+
+@ANCHOR_GENERATOR_REGISTRY.register()
+class DefaultAnchorGenerator(nn.Module):
+    """
+    Compute anchors in the standard ways described in
+    "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks".
+    """
+
+    box_dim: torch.jit.Final[int] = 4
+    """
+    the dimension of each anchor box.
+    """
+
+    @configurable
+    def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5):
+        """
+        This interface is experimental.
+
+        Args:
+            sizes (list[list[float]] or list[float]):
+                If ``sizes`` is list[list[float]], ``sizes[i]`` is the list of anchor sizes
+                (i.e. sqrt of anchor area) to use for the i-th feature map.
+                If ``sizes`` is list[float], ``sizes`` is used for all feature maps.
+                Anchor sizes are given in absolute lengths in units of
+                the input image; they do not dynamically scale if the input image size changes.
+            aspect_ratios (list[list[float]] or list[float]): list of aspect ratios
+                (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies.
+            strides (list[int]): stride of each input feature.
+            offset (float): Relative offset between the center of the first anchor and the top-left
+                corner of the image. Value has to be in [0, 1).
+                Recommend to use 0.5, which means half stride.
+        """
+        super().__init__()
+
+        self.strides = strides
+        self.num_features = len(self.strides)
+        sizes = _broadcast_params(sizes, self.num_features, "sizes")
+        aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios")
+        self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios)
+
+        self.offset = offset
+        assert 0.0 <= self.offset < 1.0, self.offset
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: List[ShapeSpec]):
+        return {
+            "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES,
+            "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS,
+            "strides": [x.stride for x in input_shape],
+            "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET,
+        }
+
+    def _calculate_anchors(self, sizes, aspect_ratios):
+        cell_anchors = [
+            self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios)
+        ]
+        return BufferList(cell_anchors)
+
+    @property
+    @torch.jit.unused
+    def num_cell_anchors(self):
+        """
+        Alias of `num_anchors`.
+        """
+        return self.num_anchors
+
+    @property
+    @torch.jit.unused
+    def num_anchors(self):
+        """
+        Returns:
+            list[int]: Each int is the number of anchors at every pixel
+                location, on that feature map.
+                For example, if at every pixel we use anchors of 3 aspect
+                ratios and 5 sizes, the number of anchors is 15.
+                (See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config)
+
+                In standard RPN models, `num_anchors` on every feature map is the same.
+        """
+        return [len(cell_anchors) for cell_anchors in self.cell_anchors]
+
+    def _grid_anchors(self, grid_sizes: List[List[int]]):
+        """
+        Returns:
+            list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4
+        """
+        anchors = []
+        # buffers() not supported by torchscript. use named_buffers() instead
+        buffers: List[torch.Tensor] = [x[1] for x in self.cell_anchors.named_buffers()]
+        for size, stride, base_anchors in zip(grid_sizes, self.strides, buffers):
+            shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device)
+            shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1)
+
+            anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4))
+
+        return anchors
+
+    def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)):
+        """
+        Generate a tensor storing canonical anchor boxes, which are all anchor
+        boxes of different sizes and aspect_ratios centered at (0, 0).
+        We can later build the set of anchors for a full feature map by
+        shifting and tiling these tensors (see `meth:_grid_anchors`).
+
+        Args:
+            sizes (tuple[float]):
+            aspect_ratios (tuple[float]]):
+
+        Returns:
+            Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes
+                in XYXY format.
+        """
+
+        # This is different from the anchor generator defined in the original Faster R-CNN
+        # code or Detectron. They yield the same AP, however the old version defines cell
+        # anchors in a less natural way with a shift relative to the feature grid and
+        # quantization that results in slightly different sizes for different aspect ratios.
+        # See also https://github.com/facebookresearch/Detectron/issues/227
+
+        anchors = []
+        for size in sizes:
+            area = size ** 2.0
+            for aspect_ratio in aspect_ratios:
+                # s * s = w * h
+                # a = h / w
+                # ... some algebra ...
+                # w = sqrt(s * s / a)
+                # h = a * w
+                w = math.sqrt(area / aspect_ratio)
+                h = aspect_ratio * w
+                x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0
+                anchors.append([x0, y0, x1, y1])
+        return torch.tensor(anchors)
+
+    def forward(self, features: List[torch.Tensor]):
+        """
+        Args:
+            features (list[Tensor]): list of backbone feature maps on which to generate anchors.
+
+        Returns:
+            list[Boxes]: a list of Boxes containing all the anchors for each feature map
+                (i.e. the cell anchors repeated over all locations in the feature map).
+                The number of anchors of each feature map is Hi x Wi x num_cell_anchors,
+                where Hi, Wi are resolution of the feature map divided by anchor stride.
+        """
+        grid_sizes = [feature_map.shape[-2:] for feature_map in features]
+        anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
+        return [Boxes(x) for x in anchors_over_all_feature_maps]
+
+
+@ANCHOR_GENERATOR_REGISTRY.register()
+class RotatedAnchorGenerator(nn.Module):
+    """
+    Compute rotated anchors used by Rotated RPN (RRPN), described in
+    "Arbitrary-Oriented Scene Text Detection via Rotation Proposals".
+    """
+
+    box_dim: int = 5
+    """
+    the dimension of each anchor box.
+    """
+
+    @configurable
+    def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5):
+        """
+        This interface is experimental.
+
+        Args:
+            sizes (list[list[float]] or list[float]):
+                If sizes is list[list[float]], sizes[i] is the list of anchor sizes
+                (i.e. sqrt of anchor area) to use for the i-th feature map.
+                If sizes is list[float], the sizes are used for all feature maps.
+                Anchor sizes are given in absolute lengths in units of
+                the input image; they do not dynamically scale if the input image size changes.
+            aspect_ratios (list[list[float]] or list[float]): list of aspect ratios
+                (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies.
+            strides (list[int]): stride of each input feature.
+            angles (list[list[float]] or list[float]): list of angles (in degrees CCW)
+                to use for anchors. Same "broadcast" rule for `sizes` applies.
+            offset (float): Relative offset between the center of the first anchor and the top-left
+                corner of the image. Value has to be in [0, 1).
+                Recommend to use 0.5, which means half stride.
+        """
+        super().__init__()
+
+        self.strides = strides
+        self.num_features = len(self.strides)
+        sizes = _broadcast_params(sizes, self.num_features, "sizes")
+        aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios")
+        angles = _broadcast_params(angles, self.num_features, "angles")
+        self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles)
+
+        self.offset = offset
+        assert 0.0 <= self.offset < 1.0, self.offset
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: List[ShapeSpec]):
+        return {
+            "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES,
+            "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS,
+            "strides": [x.stride for x in input_shape],
+            "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET,
+            "angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES,
+        }
+
+    def _calculate_anchors(self, sizes, aspect_ratios, angles):
+        cell_anchors = [
+            self.generate_cell_anchors(size, aspect_ratio, angle).float()
+            for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles)
+        ]
+        return BufferList(cell_anchors)
+
+    @property
+    def num_cell_anchors(self):
+        """
+        Alias of `num_anchors`.
+        """
+        return self.num_anchors
+
+    @property
+    def num_anchors(self):
+        """
+        Returns:
+            list[int]: Each int is the number of anchors at every pixel
+                location, on that feature map.
+                For example, if at every pixel we use anchors of 3 aspect
+                ratios, 2 sizes and 5 angles, the number of anchors is 30.
+                (See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS
+                and ANCHOR_GENERATOR.ANGLES in config)
+
+                In standard RRPN models, `num_anchors` on every feature map is the same.
+        """
+        return [len(cell_anchors) for cell_anchors in self.cell_anchors]
+
+    def _grid_anchors(self, grid_sizes):
+        anchors = []
+        for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors):
+            shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device)
+            zeros = torch.zeros_like(shift_x)
+            shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1)
+
+            anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5))
+
+        return anchors
+
+    def generate_cell_anchors(
+        self,
+        sizes=(32, 64, 128, 256, 512),
+        aspect_ratios=(0.5, 1, 2),
+        angles=(-90, -60, -30, 0, 30, 60, 90),
+    ):
+        """
+        Generate a tensor storing canonical anchor boxes, which are all anchor
+        boxes of different sizes, aspect_ratios, angles centered at (0, 0).
+        We can later build the set of anchors for a full feature map by
+        shifting and tiling these tensors (see `meth:_grid_anchors`).
+
+        Args:
+            sizes (tuple[float]):
+            aspect_ratios (tuple[float]]):
+            angles (tuple[float]]):
+
+        Returns:
+            Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5)
+                storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format.
+        """
+        anchors = []
+        for size in sizes:
+            area = size ** 2.0
+            for aspect_ratio in aspect_ratios:
+                # s * s = w * h
+                # a = h / w
+                # ... some algebra ...
+                # w = sqrt(s * s / a)
+                # h = a * w
+                w = math.sqrt(area / aspect_ratio)
+                h = aspect_ratio * w
+                anchors.extend([0, 0, w, h, a] for a in angles)
+
+        return torch.tensor(anchors)
+
+    def forward(self, features):
+        """
+        Args:
+            features (list[Tensor]): list of backbone feature maps on which to generate anchors.
+
+        Returns:
+            list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map
+                (i.e. the cell anchors repeated over all locations in the feature map).
+                The number of anchors of each feature map is Hi x Wi x num_cell_anchors,
+                where Hi, Wi are resolution of the feature map divided by anchor stride.
+        """
+        grid_sizes = [feature_map.shape[-2:] for feature_map in features]
+        anchors_over_all_feature_maps = self._grid_anchors(grid_sizes)
+        return [RotatedBoxes(x) for x in anchors_over_all_feature_maps]
+
+
+def build_anchor_generator(cfg, input_shape):
+    """
+    Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`.
+    """
+    anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME
+    return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape)
diff --git a/src/sts/detectron2/modeling/backbone/__init__.py b/src/sts/detectron2/modeling/backbone/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a34cbc5976db8e0b7d62b9d70a83a34c187c388a
--- /dev/null
+++ b/src/sts/detectron2/modeling/backbone/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .build import build_backbone, BACKBONE_REGISTRY  # noqa F401 isort:skip
+
+from .backbone import Backbone
+from .fpn import FPN
+from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
+from .swin_transformer import * 
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+# TODO can expose more resnet blocks after careful consideration
diff --git a/src/sts/detectron2/modeling/backbone/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/modeling/backbone/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..174214b10ea794bac8c902f622825dc1603a8462
Binary files /dev/null and b/src/sts/detectron2/modeling/backbone/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/backbone/__pycache__/backbone.cpython-38.pyc b/src/sts/detectron2/modeling/backbone/__pycache__/backbone.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4bba7e3b6d9d07e27591d880178f89761baf3373
Binary files /dev/null and b/src/sts/detectron2/modeling/backbone/__pycache__/backbone.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/backbone/__pycache__/build.cpython-38.pyc b/src/sts/detectron2/modeling/backbone/__pycache__/build.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fea03ff8ef2f5a13c2731b93ccf8314e323147f3
Binary files /dev/null and b/src/sts/detectron2/modeling/backbone/__pycache__/build.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/backbone/__pycache__/fpn.cpython-38.pyc b/src/sts/detectron2/modeling/backbone/__pycache__/fpn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d15646e299175159478e9e704c3a85770efde531
Binary files /dev/null and b/src/sts/detectron2/modeling/backbone/__pycache__/fpn.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/backbone/__pycache__/resnet.cpython-38.pyc b/src/sts/detectron2/modeling/backbone/__pycache__/resnet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c0416d589d457f75807523483a30ee6ba03e1384
Binary files /dev/null and b/src/sts/detectron2/modeling/backbone/__pycache__/resnet.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/backbone/__pycache__/swin_transformer.cpython-38.pyc b/src/sts/detectron2/modeling/backbone/__pycache__/swin_transformer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..44bef354bd5ca84a2df82025afd5fdedfa1e7d7b
Binary files /dev/null and b/src/sts/detectron2/modeling/backbone/__pycache__/swin_transformer.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/backbone/backbone.py b/src/sts/detectron2/modeling/backbone/backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..369fb884930c5dd82f94024c45303dafaab14d66
--- /dev/null
+++ b/src/sts/detectron2/modeling/backbone/backbone.py
@@ -0,0 +1,53 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from abc import ABCMeta, abstractmethod
+import torch.nn as nn
+
+from detectron2.layers import ShapeSpec
+
+__all__ = ["Backbone"]
+
+
+class Backbone(nn.Module, metaclass=ABCMeta):
+    """
+    Abstract base class for network backbones.
+    """
+
+    def __init__(self):
+        """
+        The `__init__` method of any subclass can specify its own set of arguments.
+        """
+        super().__init__()
+
+    @abstractmethod
+    def forward(self):
+        """
+        Subclasses must override this method, but adhere to the same return type.
+
+        Returns:
+            dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
+        """
+        pass
+
+    @property
+    def size_divisibility(self) -> int:
+        """
+        Some backbones require the input height and width to be divisible by a
+        specific integer. This is typically true for encoder / decoder type networks
+        with lateral connection (e.g., FPN) for which feature maps need to match
+        dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
+        input size divisibility is required.
+        """
+        return 0
+
+    def output_shape(self):
+        """
+        Returns:
+            dict[str->ShapeSpec]
+        """
+        # this is a backward-compatible default
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self._out_features
+        }
diff --git a/src/sts/detectron2/modeling/backbone/build.py b/src/sts/detectron2/modeling/backbone/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..af02141172bebe9a2a27a88c81673c2710b4d73f
--- /dev/null
+++ b/src/sts/detectron2/modeling/backbone/build.py
@@ -0,0 +1,33 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from detectron2.layers import ShapeSpec
+from detectron2.utils.registry import Registry
+
+from .backbone import Backbone
+
+BACKBONE_REGISTRY = Registry("BACKBONE")
+BACKBONE_REGISTRY.__doc__ = """
+Registry for backbones, which extract feature maps from images
+
+The registered object must be a callable that accepts two arguments:
+
+1. A :class:`detectron2.config.CfgNode`
+2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification.
+
+Registered object must return instance of :class:`Backbone`.
+"""
+
+
+def build_backbone(cfg, input_shape=None):
+    """
+    Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
+
+    Returns:
+        an instance of :class:`Backbone`
+    """
+    if input_shape is None:
+        input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
+
+    backbone_name = cfg.MODEL.BACKBONE.NAME
+    backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape)
+    assert isinstance(backbone, Backbone)
+    return backbone
diff --git a/src/sts/detectron2/modeling/backbone/fpn.py b/src/sts/detectron2/modeling/backbone/fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0bdfc9da8cb7afc9ef421baef2c173a63ff1743
--- /dev/null
+++ b/src/sts/detectron2/modeling/backbone/fpn.py
@@ -0,0 +1,255 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+
+from .backbone import Backbone
+from .build import BACKBONE_REGISTRY
+from .resnet import build_resnet_backbone
+
+__all__ = ["build_resnet_fpn_backbone", "build_retinanet_resnet_fpn_backbone", "FPN"]
+
+
+class FPN(Backbone):
+    """
+    This module implements :paper:`FPN`.
+    It creates pyramid features built on top of some input feature maps.
+    """
+
+    _fuse_type: torch.jit.Final[str]
+
+    def __init__(
+        self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"
+    ):
+        """
+        Args:
+            bottom_up (Backbone): module representing the bottom up subnetwork.
+                Must be a subclass of :class:`Backbone`. The multi-scale feature
+                maps generated by the bottom up network, and listed in `in_features`,
+                are used to generate FPN levels.
+            in_features (list[str]): names of the input feature maps coming
+                from the backbone to which FPN is attached. For example, if the
+                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
+                of these may be used; order must be from high to low resolution.
+            out_channels (int): number of channels in the output feature maps.
+            norm (str): the normalization to use.
+            top_block (nn.Module or None): if provided, an extra operation will
+                be performed on the output of the last (smallest resolution)
+                FPN output, and the result will extend the result list. The top_block
+                further downsamples the feature map. It must have an attribute
+                "num_levels", meaning the number of extra FPN levels added by
+                this block, and "in_feature", which is a string representing
+                its input feature (e.g., p5).
+            fuse_type (str): types for fusing the top down features and the lateral
+                ones. It can be "sum" (default), which sums up element-wise; or "avg",
+                which takes the element-wise mean of the two.
+        """
+        super(FPN, self).__init__()
+        assert isinstance(bottom_up, Backbone)
+        assert in_features, in_features
+
+        # Feature map strides and channels from the bottom up network (e.g. ResNet)
+        input_shapes = bottom_up.output_shape()
+        strides = [input_shapes[f].stride for f in in_features]
+        in_channels_per_feature = [input_shapes[f].channels for f in in_features]
+
+        _assert_strides_are_log2_contiguous(strides)
+        lateral_convs = []
+        output_convs = []
+
+        use_bias = norm == ""
+        for idx, in_channels in enumerate(in_channels_per_feature):
+            lateral_norm = get_norm(norm, out_channels)
+            output_norm = get_norm(norm, out_channels)
+
+            lateral_conv = Conv2d(
+                in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm
+            )
+            output_conv = Conv2d(
+                out_channels,
+                out_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                bias=use_bias,
+                norm=output_norm,
+            )
+            weight_init.c2_xavier_fill(lateral_conv)
+            weight_init.c2_xavier_fill(output_conv)
+            stage = int(math.log2(strides[idx]))
+            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
+            self.add_module("fpn_output{}".format(stage), output_conv)
+
+            lateral_convs.append(lateral_conv)
+            output_convs.append(output_conv)
+        # Place convs into top-down order (from low to high resolution)
+        # to make the top-down computation in forward clearer.
+        self.lateral_convs = lateral_convs[::-1]
+        self.output_convs = output_convs[::-1]
+        self.top_block = top_block
+        self.in_features = tuple(in_features)
+        self.bottom_up = bottom_up
+        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
+        self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in strides}
+        # top block output feature maps.
+        if self.top_block is not None:
+            for s in range(stage, stage + self.top_block.num_levels):
+                self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)
+
+        self._out_features = list(self._out_feature_strides.keys())
+        self._out_feature_channels = {k: out_channels for k in self._out_features}
+        self._size_divisibility = strides[-1]
+        assert fuse_type in {"avg", "sum"}
+        self._fuse_type = fuse_type
+
+    @property
+    def size_divisibility(self):
+        return self._size_divisibility
+
+    def forward(self, x):
+        """
+        Args:
+            input (dict[str->Tensor]): mapping feature map name (e.g., "res5") to
+                feature map tensor for each feature level in high to low resolution order.
+
+        Returns:
+            dict[str->Tensor]:
+                mapping from feature map name to FPN feature map tensor
+                in high to low resolution order. Returned feature names follow the FPN
+                paper convention: "p<stage>", where stage has stride = 2 ** stage e.g.,
+                ["p2", "p3", ..., "p6"].
+        """
+        bottom_up_features = self.bottom_up(x)
+        results = []
+        prev_features = self.lateral_convs[0](bottom_up_features[self.in_features[-1]])
+        results.append(self.output_convs[0](prev_features))
+
+        # Reverse feature maps into top-down order (from low to high resolution)
+        for idx, (lateral_conv, output_conv) in enumerate(
+            zip(self.lateral_convs, self.output_convs)
+        ):
+            # Slicing of ModuleList is not supported https://github.com/pytorch/pytorch/issues/47336
+            # Therefore we loop over all modules but skip the first one
+            if idx > 0:
+                features = self.in_features[-idx - 1]
+                features = bottom_up_features[features]
+                top_down_features = F.interpolate(prev_features, scale_factor=2.0, mode="nearest")
+                lateral_features = lateral_conv(features)
+                prev_features = lateral_features + top_down_features
+                if self._fuse_type == "avg":
+                    prev_features /= 2
+                results.insert(0, output_conv(prev_features))
+
+        if self.top_block is not None:
+            if self.top_block.in_feature in bottom_up_features:
+                top_block_in_feature = bottom_up_features[self.top_block.in_feature]
+            else:
+                top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)]
+            results.extend(self.top_block(top_block_in_feature))
+        assert len(self._out_features) == len(results)
+        return {f: res for f, res in zip(self._out_features, results)}
+
+    def output_shape(self):
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self._out_features
+        }
+
+
+def _assert_strides_are_log2_contiguous(strides):
+    """
+    Assert that each stride is 2x times its preceding stride, i.e. "contiguous in log2".
+    """
+    for i, stride in enumerate(strides[1:], 1):
+        assert stride == 2 * strides[i - 1], "Strides {} {} are not log2 contiguous".format(
+            stride, strides[i - 1]
+        )
+
+
+class LastLevelMaxPool(nn.Module):
+    """
+    This module is used in the original FPN to generate a downsampled
+    P6 feature from P5.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.num_levels = 1
+        self.in_feature = "p5"
+
+    def forward(self, x):
+        return [F.max_pool2d(x, kernel_size=1, stride=2, padding=0)]
+
+
+class LastLevelP6P7(nn.Module):
+    """
+    This module is used in RetinaNet to generate extra layers, P6 and P7 from
+    C5 feature.
+    """
+
+    def __init__(self, in_channels, out_channels, in_feature="res5"):
+        super().__init__()
+        self.num_levels = 2
+        self.in_feature = in_feature
+        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
+        self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
+        for module in [self.p6, self.p7]:
+            weight_init.c2_xavier_fill(module)
+
+    def forward(self, c5):
+        p6 = self.p6(c5)
+        p7 = self.p7(F.relu(p6))
+        return [p6, p7]
+
+
+@BACKBONE_REGISTRY.register()
+def build_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_resnet_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelMaxPool(),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
+
+
+@BACKBONE_REGISTRY.register()
+def build_retinanet_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_resnet_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    in_channels_p6p7 = bottom_up.output_shape()["res5"].channels
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelP6P7(in_channels_p6p7, out_channels),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
diff --git a/src/sts/detectron2/modeling/backbone/fpn_swin.py b/src/sts/detectron2/modeling/backbone/fpn_swin.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc38cfe2a1e65a95ac86555e42d8182384345a44
--- /dev/null
+++ b/src/sts/detectron2/modeling/backbone/fpn_swin.py
@@ -0,0 +1,600 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+
+from .backbone import Backbone
+from .build import BACKBONE_REGISTRY
+from .resnet import build_resnet_backbone
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+import numpy as np
+
+__all__ = ["build_resnet_fpn_backbone", "build_retinanet_resnet_fpn_backbone", "FPN"]
+
+class Mlp(nn.Module):
+    """ Multilayer perceptron."""
+
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+
+
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+
+
+class WindowAttention(nn.Module):
+    """ Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+
+    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
+
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x, mask=None):
+        """ Forward function.
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+
+        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, N, N)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class SwinTransformerBlock(nn.Module):
+    """ Swin Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+
+    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        self.H = None
+        self.W = None
+
+    def forward(self, x, mask_matrix):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+            mask_matrix: Attention mask for cyclic shift.
+        """
+        B, L, C = x.shape
+        H, W = self.H, self.W
+        assert L == H * W, "input feature has wrong size"
+
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+
+        # pad feature maps to multiples of window size
+        pad_l = pad_t = 0
+        pad_r = (self.window_size - W % self.window_size) % self.window_size
+        pad_b = (self.window_size - H % self.window_size) % self.window_size
+        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
+        _, Hp, Wp, _ = x.shape
+
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+            attn_mask = mask_matrix
+        else:
+            shifted_x = x
+            attn_mask = None
+
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
+
+        # W-MSA/SW-MSA
+        attn_windows = self.attn(x_windows, mask=attn_mask)  # nW*B, window_size*window_size, C
+
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # B H' W' C
+
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+
+        if pad_r > 0 or pad_b > 0:
+            x = x[:, :H, :W, :].contiguous()
+
+        x = x.view(B, H * W, C)
+
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+
+        return x
+
+class swin_layer(nn.Module):
+    """ A basic Swin Transformer layer for one stage.
+    Args:
+        dim (int): Number of feature channels
+        depth (int): Depths of this stage.
+        num_heads (int): Number of attention head.
+        window_size (int): Local window size. Default: 7.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+
+    def __init__(self,
+                 dim,
+                 depth,
+                 num_heads,
+                 window_size=7,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 norm_layer=nn.LayerNorm,
+                 downsample=None,
+                 use_checkpoint=False):
+        super().__init__()
+        self.window_size = window_size
+        self.shift_size = window_size // 2
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+
+        # build blocks
+        self.blocks = nn.ModuleList([
+            SwinTransformerBlock(
+                dim=dim,
+                num_heads=num_heads,
+                window_size=window_size,
+                shift_size=0 if (i % 2 == 0) else window_size // 2,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop,
+                attn_drop=attn_drop,
+                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                norm_layer=norm_layer)
+            for i in range(depth)])
+
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(dim=dim, norm_layer=norm_layer)
+        else:
+            self.downsample = None
+
+    def forward(self, x, H, W):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+
+        # calculate attention mask for SW-MSA
+        Hp = int(np.ceil(H / self.window_size)) * self.window_size
+        Wp = int(np.ceil(W / self.window_size)) * self.window_size
+        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # 1 Hp Wp 1
+        h_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        w_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+
+        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+        for blk in self.blocks:
+            blk.H, blk.W = H, W
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, attn_mask)
+            else:
+                x = blk(x, attn_mask)
+        if self.downsample is not None:
+            x_down = self.downsample(x, H, W)
+            Wh, Ww = (H + 1) // 2, (W + 1) // 2
+            return x, H, W, x_down, Wh, Ww
+        else:
+            return x, H, W, x, H, W
+
+class FPN(Backbone):
+    """
+    This module implements :paper:`FPN`.
+    It creates pyramid features built on top of some input feature maps.
+    """
+
+    _fuse_type: torch.jit.Final[str]
+
+    def __init__(
+        self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"
+    ):
+        """
+        Args:
+            bottom_up (Backbone): module representing the bottom up subnetwork.
+                Must be a subclass of :class:`Backbone`. The multi-scale feature
+                maps generated by the bottom up network, and listed in `in_features`,
+                are used to generate FPN levels.
+            in_features (list[str]): names of the input feature maps coming
+                from the backbone to which FPN is attached. For example, if the
+                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
+                of these may be used; order must be from high to low resolution.
+            out_channels (int): number of channels in the output feature maps.
+            norm (str): the normalization to use.
+            top_block (nn.Module or None): if provided, an extra operation will
+                be performed on the output of the last (smallest resolution)
+                FPN output, and the result will extend the result list. The top_block
+                further downsamples the feature map. It must have an attribute
+                "num_levels", meaning the number of extra FPN levels added by
+                this block, and "in_feature", which is a string representing
+                its input feature (e.g., p5).
+            fuse_type (str): types for fusing the top down features and the lateral
+                ones. It can be "sum" (default), which sums up element-wise; or "avg",
+                which takes the element-wise mean of the two.
+        """
+        super(FPN, self).__init__()
+        assert isinstance(bottom_up, Backbone)
+        assert in_features, in_features
+
+        # Feature map strides and channels from the bottom up network (e.g. ResNet)
+        input_shapes = bottom_up.output_shape()
+        strides = [input_shapes[f].stride for f in in_features]
+        in_channels_per_feature = [input_shapes[f].channels for f in in_features]
+
+        _assert_strides_are_log2_contiguous(strides)
+        lateral_convs = []
+        output_convs = []
+
+        use_bias = norm == ""
+        for idx, in_channels in enumerate(in_channels_per_feature):
+            lateral_norm = get_norm(norm, out_channels)
+            output_norm = get_norm(norm, out_channels)
+
+            lateral_conv = Conv2d(
+                in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm
+            )
+            # output_conv = Conv2d(
+            #     out_channels,
+            #     out_channels,
+            #     kernel_size=3,
+            #     stride=1,
+            #     padding=1,
+            #     bias=use_bias,
+            #     norm=output_norm,
+            # )
+            output_conv = swin_layer( dim=out_channels,
+                                        depth=1,
+                                        num_heads=2,
+                                        window_size=7)
+            self.out_channels = out_channels
+            weight_init.c2_xavier_fill(lateral_conv)
+            # weight_init.c2_xavier_fill(output_conv)
+            stage = int(math.log2(strides[idx]))
+            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
+            self.add_module("fpn_output{}".format(stage), output_conv)
+
+            lateral_convs.append(lateral_conv)
+            output_convs.append(output_conv)
+        # Place convs into top-down order (from low to high resolution)
+        # to make the top-down computation in forward clearer.
+        self.lateral_convs = lateral_convs[::-1]
+        self.output_convs = output_convs[::-1]
+        self.top_block = top_block
+        self.in_features = tuple(in_features)
+        self.bottom_up = bottom_up
+        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
+        self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in strides}
+        # top block output feature maps.
+        if self.top_block is not None:
+            for s in range(stage, stage + self.top_block.num_levels):
+                self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)
+
+        self._out_features = list(self._out_feature_strides.keys())
+        self._out_feature_channels = {k: out_channels for k in self._out_features}
+        self._size_divisibility = strides[-1]
+        assert fuse_type in {"avg", "sum"}
+        self._fuse_type = fuse_type
+
+    @property
+    def size_divisibility(self):
+        return self._size_divisibility
+
+    def forward(self, x):
+        """
+        Args:
+            input (dict[str->Tensor]): mapping feature map name (e.g., "res5") to
+                feature map tensor for each feature level in high to low resolution order.
+
+        Returns:
+            dict[str->Tensor]:
+                mapping from feature map name to FPN feature map tensor
+                in high to low resolution order. Returned feature names follow the FPN
+                paper convention: "p<stage>", where stage has stride = 2 ** stage e.g.,
+                ["p2", "p3", ..., "p6"].
+        """
+        bottom_up_features = self.bottom_up(x)
+        results = []
+        prev_features = self.lateral_convs[0](bottom_up_features[self.in_features[-1]])
+        B, C, Wh, Ww = prev_features.size()
+        prev_features = prev_features.flatten(2).transpose(1, 2)
+        x_out, H, W, x, Wh, Ww = self.output_convs[0](prev_features, Wh, Ww)
+        prev_features = x_out.transpose(1, 2).view(-1, self.out_channels, Wh, Ww)
+        results.append(prev_features)
+        # Reverse feature maps into top-down order (from low to high resolution)
+        for idx, (lateral_conv, output_conv) in enumerate(
+            zip(self.lateral_convs, self.output_convs)
+        ):
+            # Slicing of ModuleList is not supported https://github.com/pytorch/pytorch/issues/47336
+            # Therefore we loop over all modules but skip the first one
+            if idx > 0:
+                features = self.in_features[-idx - 1]
+                features = bottom_up_features[features]
+                top_down_features = F.interpolate(prev_features, scale_factor=2.0, mode="nearest")
+                lateral_features = lateral_conv(features)
+                prev_features = lateral_features + top_down_features
+                if self._fuse_type == "avg":
+                    prev_features /= 2
+                B, C, Wh, Ww = prev_features.size()
+                prev_features = prev_features.flatten(2).transpose(1, 2)
+                x_out, H, W, x, Wh, Ww = self.output_convs[0](prev_features, Wh, Ww)
+                prev_features = x_out.transpose(1, 2).view(-1, self.out_channels, Wh, Ww)
+                results.insert(0, prev_features)
+
+        if self.top_block is not None:
+            if self.top_block.in_feature in bottom_up_features:
+                top_block_in_feature = bottom_up_features[self.top_block.in_feature]
+            else:
+                top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)]
+            results.extend(self.top_block(top_block_in_feature))
+        assert len(self._out_features) == len(results)
+        return {f: res for f, res in zip(self._out_features, results)}
+
+    def output_shape(self):
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self._out_features
+        }
+
+
+def _assert_strides_are_log2_contiguous(strides):
+    """
+    Assert that each stride is 2x times its preceding stride, i.e. "contiguous in log2".
+    """
+    for i, stride in enumerate(strides[1:], 1):
+        assert stride == 2 * strides[i - 1], "Strides {} {} are not log2 contiguous".format(
+            stride, strides[i - 1]
+        )
+
+
+class LastLevelMaxPool(nn.Module):
+    """
+    This module is used in the original FPN to generate a downsampled
+    P6 feature from P5.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.num_levels = 1
+        self.in_feature = "p5"
+
+    def forward(self, x):
+        return [F.max_pool2d(x, kernel_size=1, stride=2, padding=0)]
+
+
+class LastLevelP6P7(nn.Module):
+    """
+    This module is used in RetinaNet to generate extra layers, P6 and P7 from
+    C5 feature.
+    """
+
+    def __init__(self, in_channels, out_channels, in_feature="res5"):
+        super().__init__()
+        self.num_levels = 2
+        self.in_feature = in_feature
+        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
+        self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
+        for module in [self.p6, self.p7]:
+            weight_init.c2_xavier_fill(module)
+
+    def forward(self, c5):
+        p6 = self.p6(c5)
+        p7 = self.p7(F.relu(p6))
+        return [p6, p7]
+
+
+@BACKBONE_REGISTRY.register()
+def build_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_resnet_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelMaxPool(),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
+
+
+@BACKBONE_REGISTRY.register()
+def build_retinanet_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_resnet_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    in_channels_p6p7 = bottom_up.output_shape()["res5"].channels
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelP6P7(in_channels_p6p7, out_channels),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
diff --git a/src/sts/detectron2/modeling/backbone/resnet.py b/src/sts/detectron2/modeling/backbone/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..85bcf7f45b7e861ec027c6d677b28e7dd713931c
--- /dev/null
+++ b/src/sts/detectron2/modeling/backbone/resnet.py
@@ -0,0 +1,693 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.layers import (
+    CNNBlockBase,
+    Conv2d,
+    DeformConv,
+    ModulatedDeformConv,
+    ShapeSpec,
+    get_norm,
+)
+
+from .backbone import Backbone
+from .build import BACKBONE_REGISTRY
+
+__all__ = [
+    "ResNetBlockBase",
+    "BasicBlock",
+    "BottleneckBlock",
+    "DeformBottleneckBlock",
+    "BasicStem",
+    "ResNet",
+    "make_stage",
+    "build_resnet_backbone",
+]
+
+
+class BasicBlock(CNNBlockBase):
+    """
+    The basic residual block for ResNet-18 and ResNet-34 defined in :paper:`ResNet`,
+    with two 3x3 conv layers and a projection shortcut if needed.
+    """
+
+    def __init__(self, in_channels, out_channels, *, stride=1, norm="BN"):
+        """
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            stride (int): Stride for the first conv.
+            norm (str or callable): normalization for all conv layers.
+                See :func:`layers.get_norm` for supported format.
+        """
+        super().__init__(in_channels, out_channels, stride)
+
+        if in_channels != out_channels:
+            self.shortcut = Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                stride=stride,
+                bias=False,
+                norm=get_norm(norm, out_channels),
+            )
+        else:
+            self.shortcut = None
+
+        self.conv1 = Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+
+        self.conv2 = Conv2d(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+
+        for layer in [self.conv1, self.conv2, self.shortcut]:
+            if layer is not None:  # shortcut can be None
+                weight_init.c2_msra_fill(layer)
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = F.relu_(out)
+        out = self.conv2(out)
+
+        if self.shortcut is not None:
+            shortcut = self.shortcut(x)
+        else:
+            shortcut = x
+
+        out += shortcut
+        out = F.relu_(out)
+        return out
+
+
+class BottleneckBlock(CNNBlockBase):
+    """
+    The standard bottleneck residual block used by ResNet-50, 101 and 152
+    defined in :paper:`ResNet`.  It contains 3 conv layers with kernels
+    1x1, 3x3, 1x1, and a projection shortcut if needed.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        *,
+        bottleneck_channels,
+        stride=1,
+        num_groups=1,
+        norm="BN",
+        stride_in_1x1=False,
+        dilation=1,
+    ):
+        """
+        Args:
+            bottleneck_channels (int): number of output channels for the 3x3
+                "bottleneck" conv layers.
+            num_groups (int): number of groups for the 3x3 conv layer.
+            norm (str or callable): normalization for all conv layers.
+                See :func:`layers.get_norm` for supported format.
+            stride_in_1x1 (bool): when stride>1, whether to put stride in the
+                first 1x1 convolution or the bottleneck 3x3 convolution.
+            dilation (int): the dilation rate of the 3x3 conv layer.
+        """
+        super().__init__(in_channels, out_channels, stride)
+
+        if in_channels != out_channels:
+            self.shortcut = Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                stride=stride,
+                bias=False,
+                norm=get_norm(norm, out_channels),
+            )
+        else:
+            self.shortcut = None
+
+        # The original MSRA ResNet models have stride in the first 1x1 conv
+        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
+        # stride in the 3x3 conv
+        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
+
+        self.conv1 = Conv2d(
+            in_channels,
+            bottleneck_channels,
+            kernel_size=1,
+            stride=stride_1x1,
+            bias=False,
+            norm=get_norm(norm, bottleneck_channels),
+        )
+
+        self.conv2 = Conv2d(
+            bottleneck_channels,
+            bottleneck_channels,
+            kernel_size=3,
+            stride=stride_3x3,
+            padding=1 * dilation,
+            bias=False,
+            groups=num_groups,
+            dilation=dilation,
+            norm=get_norm(norm, bottleneck_channels),
+        )
+
+        self.conv3 = Conv2d(
+            bottleneck_channels,
+            out_channels,
+            kernel_size=1,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+
+        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
+            if layer is not None:  # shortcut can be None
+                weight_init.c2_msra_fill(layer)
+
+        # Zero-initialize the last normalization in each residual branch,
+        # so that at the beginning, the residual branch starts with zeros,
+        # and each residual block behaves like an identity.
+        # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
+        # "For BN layers, the learnable scaling coefficient γ is initialized
+        # to be 1, except for each residual block's last BN
+        # where γ is initialized to be 0."
+
+        # nn.init.constant_(self.conv3.norm.weight, 0)
+        # TODO this somehow hurts performance when training GN models from scratch.
+        # Add it as an option when we need to use this code to train a backbone.
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = F.relu_(out)
+
+        out = self.conv2(out)
+        out = F.relu_(out)
+
+        out = self.conv3(out)
+
+        if self.shortcut is not None:
+            shortcut = self.shortcut(x)
+        else:
+            shortcut = x
+
+        out += shortcut
+        out = F.relu_(out)
+        return out
+
+
+class DeformBottleneckBlock(CNNBlockBase):
+    """
+    Similar to :class:`BottleneckBlock`, but with :paper:`deformable conv <deformconv>`
+    in the 3x3 convolution.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        *,
+        bottleneck_channels,
+        stride=1,
+        num_groups=1,
+        norm="BN",
+        stride_in_1x1=False,
+        dilation=1,
+        deform_modulated=False,
+        deform_num_groups=1,
+    ):
+        super().__init__(in_channels, out_channels, stride)
+        self.deform_modulated = deform_modulated
+
+        if in_channels != out_channels:
+            self.shortcut = Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                stride=stride,
+                bias=False,
+                norm=get_norm(norm, out_channels),
+            )
+        else:
+            self.shortcut = None
+
+        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
+
+        self.conv1 = Conv2d(
+            in_channels,
+            bottleneck_channels,
+            kernel_size=1,
+            stride=stride_1x1,
+            bias=False,
+            norm=get_norm(norm, bottleneck_channels),
+        )
+
+        if deform_modulated:
+            deform_conv_op = ModulatedDeformConv
+            # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size
+            offset_channels = 27
+        else:
+            deform_conv_op = DeformConv
+            offset_channels = 18
+
+        self.conv2_offset = Conv2d(
+            bottleneck_channels,
+            offset_channels * deform_num_groups,
+            kernel_size=3,
+            stride=stride_3x3,
+            padding=1 * dilation,
+            dilation=dilation,
+        )
+        self.conv2 = deform_conv_op(
+            bottleneck_channels,
+            bottleneck_channels,
+            kernel_size=3,
+            stride=stride_3x3,
+            padding=1 * dilation,
+            bias=False,
+            groups=num_groups,
+            dilation=dilation,
+            deformable_groups=deform_num_groups,
+            norm=get_norm(norm, bottleneck_channels),
+        )
+
+        self.conv3 = Conv2d(
+            bottleneck_channels,
+            out_channels,
+            kernel_size=1,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+
+        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
+            if layer is not None:  # shortcut can be None
+                weight_init.c2_msra_fill(layer)
+
+        nn.init.constant_(self.conv2_offset.weight, 0)
+        nn.init.constant_(self.conv2_offset.bias, 0)
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = F.relu_(out)
+
+        if self.deform_modulated:
+            offset_mask = self.conv2_offset(out)
+            offset_x, offset_y, mask = torch.chunk(offset_mask, 3, dim=1)
+            offset = torch.cat((offset_x, offset_y), dim=1)
+            mask = mask.sigmoid()
+            out = self.conv2(out, offset, mask)
+        else:
+            offset = self.conv2_offset(out)
+            out = self.conv2(out, offset)
+        out = F.relu_(out)
+
+        out = self.conv3(out)
+
+        if self.shortcut is not None:
+            shortcut = self.shortcut(x)
+        else:
+            shortcut = x
+
+        out += shortcut
+        out = F.relu_(out)
+        return out
+
+
+class BasicStem(CNNBlockBase):
+    """
+    The standard ResNet stem (layers before the first residual block).
+    """
+
+    def __init__(self, in_channels=3, out_channels=64, norm="BN"):
+        """
+        Args:
+            norm (str or callable): norm after the first conv layer.
+                See :func:`layers.get_norm` for supported format.
+        """
+        super().__init__(in_channels, out_channels, 4)
+        self.in_channels = in_channels
+        self.conv1 = Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=7,
+            stride=2,
+            padding=3,
+            bias=False,
+            norm=get_norm(norm, out_channels),
+        )
+        weight_init.c2_msra_fill(self.conv1)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu_(x)
+        x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
+        return x
+
+
+class ResNet(Backbone):
+    """
+    Implement :paper:`ResNet`.
+    """
+
+    def __init__(self, stem, stages, num_classes=None, out_features=None, freeze_at=0):
+        """
+        Args:
+            stem (nn.Module): a stem module
+            stages (list[list[CNNBlockBase]]): several (typically 4) stages,
+                each contains multiple :class:`CNNBlockBase`.
+            num_classes (None or int): if None, will not perform classification.
+                Otherwise, will create a linear layer.
+            out_features (list[str]): name of the layers whose outputs should
+                be returned in forward. Can be anything in "stem", "linear", or "res2" ...
+                If None, will return the output of the last layer.
+            freeze_at (int): The number of stages at the beginning to freeze.
+                see :meth:`freeze` for detailed explanation.
+        """
+        super().__init__()
+        self.stem = stem
+        self.num_classes = num_classes
+
+        current_stride = self.stem.stride
+        self._out_feature_strides = {"stem": current_stride}
+        self._out_feature_channels = {"stem": self.stem.out_channels}
+
+        self.stage_names, self.stages = [], []
+
+        if out_features is not None:
+            # Avoid keeping unused layers in this module. They consume extra memory
+            # and may cause allreduce to fail
+            num_stages = max(
+                [{"res2": 1, "res3": 2, "res4": 3, "res5": 4}.get(f, 0) for f in out_features]
+            )
+            stages = stages[:num_stages]
+        for i, blocks in enumerate(stages):
+            assert len(blocks) > 0, len(blocks)
+            for block in blocks:
+                assert isinstance(block, CNNBlockBase), block
+
+            name = "res" + str(i + 2)
+            stage = nn.Sequential(*blocks)
+
+            self.add_module(name, stage)
+            self.stage_names.append(name)
+            self.stages.append(stage)
+
+            self._out_feature_strides[name] = current_stride = int(
+                current_stride * np.prod([k.stride for k in blocks])
+            )
+            self._out_feature_channels[name] = curr_channels = blocks[-1].out_channels
+        self.stage_names = tuple(self.stage_names)  # Make it static for scripting
+
+        if num_classes is not None:
+            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+            self.linear = nn.Linear(curr_channels, num_classes)
+
+            # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
+            # "The 1000-way fully-connected layer is initialized by
+            # drawing weights from a zero-mean Gaussian with standard deviation of 0.01."
+            nn.init.normal_(self.linear.weight, std=0.01)
+            name = "linear"
+
+        if out_features is None:
+            out_features = [name]
+        self._out_features = out_features
+        assert len(self._out_features)
+        children = [x[0] for x in self.named_children()]
+        for out_feature in self._out_features:
+            assert out_feature in children, "Available children: {}".format(", ".join(children))
+        self.freeze(freeze_at)
+
+    def forward(self, x):
+        """
+        Args:
+            x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``.
+
+        Returns:
+            dict[str->Tensor]: names and the corresponding features
+        """
+        assert x.dim() == 4, f"ResNet takes an input of shape (N, C, H, W). Got {x.shape} instead!"
+        outputs = {}
+        x = self.stem(x)
+        if "stem" in self._out_features:
+            outputs["stem"] = x
+        for name, stage in zip(self.stage_names, self.stages):
+            x = stage(x)
+            if name in self._out_features:
+                outputs[name] = x
+        if self.num_classes is not None:
+            x = self.avgpool(x)
+            x = torch.flatten(x, 1)
+            x = self.linear(x)
+            if "linear" in self._out_features:
+                outputs["linear"] = x
+        return outputs
+
+    def output_shape(self):
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self._out_features
+        }
+
+    def freeze(self, freeze_at=0):
+        """
+        Freeze the first several stages of the ResNet. Commonly used in
+        fine-tuning.
+
+        Layers that produce the same feature map spatial size are defined as one
+        "stage" by :paper:`FPN`.
+
+        Args:
+            freeze_at (int): number of stages to freeze.
+                `1` means freezing the stem. `2` means freezing the stem and
+                one residual stage, etc.
+
+        Returns:
+            nn.Module: this ResNet itself
+        """
+        if freeze_at >= 1:
+            self.stem.freeze()
+        for idx, stage in enumerate(self.stages, start=2):
+            if freeze_at >= idx:
+                for block in stage.children():
+                    block.freeze()
+        return self
+
+    @staticmethod
+    def make_stage(block_class, num_blocks, *, in_channels, out_channels, **kwargs):
+        """
+        Create a list of blocks of the same type that forms one ResNet stage.
+
+        Args:
+            block_class (type): a subclass of CNNBlockBase that's used to create all blocks in this
+                stage. A module of this type must not change spatial resolution of inputs unless its
+                stride != 1.
+            num_blocks (int): number of blocks in this stage
+            in_channels (int): input channels of the entire stage.
+            out_channels (int): output channels of **every block** in the stage.
+            kwargs: other arguments passed to the constructor of
+                `block_class`. If the argument name is "xx_per_block", the
+                argument is a list of values to be passed to each block in the
+                stage. Otherwise, the same argument is passed to every block
+                in the stage.
+
+        Returns:
+            list[CNNBlockBase]: a list of block module.
+
+        Examples:
+        ::
+            stage = ResNet.make_stage(
+                BottleneckBlock, 3, in_channels=16, out_channels=64,
+                bottleneck_channels=16, num_groups=1,
+                stride_per_block=[2, 1, 1],
+                dilations_per_block=[1, 1, 2]
+            )
+
+        Usually, layers that produce the same feature map spatial size are defined as one
+        "stage" (in :paper:`FPN`). Under such definition, ``stride_per_block[1:]`` should
+        all be 1.
+        """
+        blocks = []
+        for i in range(num_blocks):
+            curr_kwargs = {}
+            for k, v in kwargs.items():
+                if k.endswith("_per_block"):
+                    assert len(v) == num_blocks, (
+                        f"Argument '{k}' of make_stage should have the "
+                        f"same length as num_blocks={num_blocks}."
+                    )
+                    newk = k[: -len("_per_block")]
+                    assert newk not in kwargs, f"Cannot call make_stage with both {k} and {newk}!"
+                    curr_kwargs[newk] = v[i]
+                else:
+                    curr_kwargs[k] = v
+
+            blocks.append(
+                block_class(in_channels=in_channels, out_channels=out_channels, **curr_kwargs)
+            )
+            in_channels = out_channels
+        return blocks
+
+    @staticmethod
+    def make_default_stages(depth, block_class=None, **kwargs):
+        """
+        Created list of ResNet stages from pre-defined depth (one of 18, 34, 50, 101, 152).
+        If it doesn't create the ResNet variant you need, please use :meth:`make_stage`
+        instead for fine-grained customization.
+
+        Args:
+            depth (int): depth of ResNet
+            block_class (type): the CNN block class. Has to accept
+                `bottleneck_channels` argument for depth > 50.
+                By default it is BasicBlock or BottleneckBlock, based on the
+                depth.
+            kwargs:
+                other arguments to pass to `make_stage`. Should not contain
+                stride and channels, as they are predefined for each depth.
+
+        Returns:
+            list[list[CNNBlockBase]]: modules in all stages; see arguments of
+                :class:`ResNet.__init__`.
+        """
+        num_blocks_per_stage = {
+            18: [2, 2, 2, 2],
+            34: [3, 4, 6, 3],
+            50: [3, 4, 6, 3],
+            101: [3, 4, 23, 3],
+            152: [3, 8, 36, 3],
+        }[depth]
+        if block_class is None:
+            block_class = BasicBlock if depth < 50 else BottleneckBlock
+        if depth < 50:
+            in_channels = [64, 64, 128, 256]
+            out_channels = [64, 128, 256, 512]
+        else:
+            in_channels = [64, 256, 512, 1024]
+            out_channels = [256, 512, 1024, 2048]
+        ret = []
+        for (n, s, i, o) in zip(num_blocks_per_stage, [1, 2, 2, 2], in_channels, out_channels):
+            if depth >= 50:
+                kwargs["bottleneck_channels"] = o // 4
+            ret.append(
+                ResNet.make_stage(
+                    block_class=block_class,
+                    num_blocks=n,
+                    stride_per_block=[s] + [1] * (n - 1),
+                    in_channels=i,
+                    out_channels=o,
+                    **kwargs,
+                )
+            )
+        return ret
+
+
+ResNetBlockBase = CNNBlockBase
+"""
+Alias for backward compatibiltiy.
+"""
+
+
+def make_stage(*args, **kwargs):
+    """
+    Deprecated alias for backward compatibiltiy.
+    """
+    return ResNet.make_stage(*args, **kwargs)
+
+
+@BACKBONE_REGISTRY.register()
+def build_resnet_backbone(cfg, input_shape):
+    """
+    Create a ResNet instance from config.
+
+    Returns:
+        ResNet: a :class:`ResNet` instance.
+    """
+    # need registration of new blocks/stems?
+    norm = cfg.MODEL.RESNETS.NORM
+    stem = BasicStem(
+        in_channels=input_shape.channels,
+        out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
+        norm=norm,
+    )
+
+    # fmt: off
+    freeze_at           = cfg.MODEL.BACKBONE.FREEZE_AT
+    out_features        = cfg.MODEL.RESNETS.OUT_FEATURES
+    depth               = cfg.MODEL.RESNETS.DEPTH
+    num_groups          = cfg.MODEL.RESNETS.NUM_GROUPS
+    width_per_group     = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
+    bottleneck_channels = num_groups * width_per_group
+    in_channels         = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
+    out_channels        = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
+    stride_in_1x1       = cfg.MODEL.RESNETS.STRIDE_IN_1X1
+    res5_dilation       = cfg.MODEL.RESNETS.RES5_DILATION
+    deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE
+    deform_modulated    = cfg.MODEL.RESNETS.DEFORM_MODULATED
+    deform_num_groups   = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS
+    # fmt: on
+    assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation)
+
+    num_blocks_per_stage = {
+        18: [2, 2, 2, 2],
+        34: [3, 4, 6, 3],
+        50: [3, 4, 6, 3],
+        101: [3, 4, 23, 3],
+        152: [3, 8, 36, 3],
+    }[depth]
+
+    if depth in [18, 34]:
+        assert out_channels == 64, "Must set MODEL.RESNETS.RES2_OUT_CHANNELS = 64 for R18/R34"
+        assert not any(
+            deform_on_per_stage
+        ), "MODEL.RESNETS.DEFORM_ON_PER_STAGE unsupported for R18/R34"
+        assert res5_dilation == 1, "Must set MODEL.RESNETS.RES5_DILATION = 1 for R18/R34"
+        assert num_groups == 1, "Must set MODEL.RESNETS.NUM_GROUPS = 1 for R18/R34"
+
+    stages = []
+
+    for idx, stage_idx in enumerate(range(2, 6)):
+        # res5_dilation is used this way as a convention in R-FCN & Deformable Conv paper
+        dilation = res5_dilation if stage_idx == 5 else 1
+        first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2
+        stage_kargs = {
+            "num_blocks": num_blocks_per_stage[idx],
+            "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1),
+            "in_channels": in_channels,
+            "out_channels": out_channels,
+            "norm": norm,
+        }
+        # Use BasicBlock for R18 and R34.
+        if depth in [18, 34]:
+            stage_kargs["block_class"] = BasicBlock
+        else:
+            stage_kargs["bottleneck_channels"] = bottleneck_channels
+            stage_kargs["stride_in_1x1"] = stride_in_1x1
+            stage_kargs["dilation"] = dilation
+            stage_kargs["num_groups"] = num_groups
+            if deform_on_per_stage[idx]:
+                stage_kargs["block_class"] = DeformBottleneckBlock
+                stage_kargs["deform_modulated"] = deform_modulated
+                stage_kargs["deform_num_groups"] = deform_num_groups
+            else:
+                stage_kargs["block_class"] = BottleneckBlock
+        blocks = ResNet.make_stage(**stage_kargs)
+        in_channels = out_channels
+        out_channels *= 2
+        bottleneck_channels *= 2
+        stages.append(blocks)
+    return ResNet(stem, stages, out_features=out_features, freeze_at=freeze_at)
diff --git a/src/sts/detectron2/modeling/backbone/swin_transformer.py b/src/sts/detectron2/modeling/backbone/swin_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6dea49a72439a7909a4c880068432c34cec8452
--- /dev/null
+++ b/src/sts/detectron2/modeling/backbone/swin_transformer.py
@@ -0,0 +1,725 @@
+# --------------------------------------------------------
+# Swin Transformer
+# modified from https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/blob/master/mmdet/models/backbones/swin_transformer.py
+# --------------------------------------------------------
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+import numpy as np
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+
+from . import Backbone
+from .build import BACKBONE_REGISTRY
+from .fpn import FPN, LastLevelMaxPool, LastLevelP6P7
+from detectron2.layers import ShapeSpec
+
+
+class Mlp(nn.Module):
+    """ Multilayer perceptron."""
+
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+
+
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+
+
+class WindowAttention(nn.Module):
+    """ Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+
+    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
+
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x, mask=None):
+        """ Forward function.
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+
+        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, N, N)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class SwinTransformerBlock(nn.Module):
+    """ Swin Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+
+    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        self.H = None
+        self.W = None
+
+    def forward(self, x, mask_matrix):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+            mask_matrix: Attention mask for cyclic shift.
+        """
+        B, L, C = x.shape
+        H, W = self.H, self.W
+        assert L == H * W, "input feature has wrong size"
+
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+
+        # pad feature maps to multiples of window size
+        pad_l = pad_t = 0
+        pad_r = (self.window_size - W % self.window_size) % self.window_size
+        pad_b = (self.window_size - H % self.window_size) % self.window_size
+        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
+        _, Hp, Wp, _ = x.shape
+
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+            attn_mask = mask_matrix
+        else:
+            shifted_x = x
+            attn_mask = None
+
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
+
+        # W-MSA/SW-MSA
+        attn_windows = self.attn(x_windows, mask=attn_mask)  # nW*B, window_size*window_size, C
+
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # B H' W' C
+
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+
+        if pad_r > 0 or pad_b > 0:
+            x = x[:, :H, :W, :].contiguous()
+
+        x = x.view(B, H * W, C)
+
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+
+        return x
+
+
+class PatchMerging(nn.Module):
+    """ Patch Merging Layer
+    Args:
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(4 * dim)
+
+    def forward(self, x, H, W):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+
+        x = x.view(B, H, W, C)
+
+        # padding
+        pad_input = (H % 2 == 1) or (W % 2 == 1)
+        if pad_input:
+            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))
+
+        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
+        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
+        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
+        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
+        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
+        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
+
+        x = self.norm(x)
+        x = self.reduction(x)
+
+        return x
+
+class BasicLayer(nn.Module):
+    """ A basic Swin Transformer layer for one stage.
+    Args:
+        dim (int): Number of feature channels
+        depth (int): Depths of this stage.
+        num_heads (int): Number of attention head.
+        window_size (int): Local window size. Default: 7.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+
+    def __init__(self,
+                 dim,
+                 depth,
+                 num_heads,
+                 window_size=7,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 norm_layer=nn.LayerNorm,
+                 downsample=None,
+                 use_checkpoint=False):
+        super().__init__()
+        self.window_size = window_size
+        self.shift_size = window_size // 2
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+
+        # build blocks
+        self.blocks = nn.ModuleList([
+            SwinTransformerBlock(
+                dim=dim,
+                num_heads=num_heads,
+                window_size=window_size,
+                shift_size=0 if (i % 2 == 0) else window_size // 2,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop,
+                attn_drop=attn_drop,
+                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                norm_layer=norm_layer)
+            for i in range(depth)])
+
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(dim=dim, norm_layer=norm_layer)
+        else:
+            self.downsample = None
+
+    def forward(self, x, H, W):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+
+        # calculate attention mask for SW-MSA
+        Hp = int(np.ceil(H / self.window_size)) * self.window_size
+        Wp = int(np.ceil(W / self.window_size)) * self.window_size
+        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # 1 Hp Wp 1
+        h_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        w_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+
+        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+        for blk in self.blocks:
+            blk.H, blk.W = H, W
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, attn_mask)
+            else:
+                x = blk(x, attn_mask)
+        if self.downsample is not None:
+            x_down = self.downsample(x, H, W)
+            Wh, Ww = (H + 1) // 2, (W + 1) // 2
+            return x, H, W, x_down, Wh, Ww
+        else:
+            return x, H, W, x, H, W
+
+
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    Args:
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+
+    def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        patch_size = to_2tuple(patch_size)
+        self.patch_size = patch_size
+
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+
+    def forward(self, x):
+        """Forward function."""
+        # padding
+        _, _, H, W = x.size()
+        if W % self.patch_size[1] != 0:
+            x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1]))
+        if H % self.patch_size[0] != 0:
+            x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0]))
+
+        x = self.proj(x)  # B C Wh Ww
+        if self.norm is not None:
+            Wh, Ww = x.size(2), x.size(3)
+            x = x.flatten(2).transpose(1, 2)
+            x = self.norm(x)
+            x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww)
+
+        return x
+
+
+class SwinTransformer(Backbone):
+    """ Swin Transformer backbone.
+        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
+          https://arxiv.org/pdf/2103.14030
+    Args:
+        pretrain_img_size (int): Input image size for training the pretrained model,
+            used in absolute postion embedding. Default 224.
+        patch_size (int | tuple(int)): Patch size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        depths (tuple[int]): Depths of each Swin Transformer stage.
+        num_heads (tuple[int]): Number of attention head of each stage.
+        window_size (int): Window size. Default: 7.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set.
+        drop_rate (float): Dropout rate.
+        attn_drop_rate (float): Attention dropout rate. Default: 0.
+        drop_path_rate (float): Stochastic depth rate. Default: 0.2.
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False.
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True.
+        out_indices (Sequence[int]): Output from which stages.
+        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+            -1 means not freezing any parameters.
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+
+    def __init__(self,
+                 pretrain_img_size=224,
+                 patch_size=4,
+                 in_chans=3,
+                 embed_dim=96,
+                 depths=[2, 2, 6, 2],
+                 num_heads=[3, 6, 12, 24],
+                 window_size=7,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop_rate=0.,
+                 attn_drop_rate=0.,
+                 drop_path_rate=0.2,
+                 norm_layer=nn.LayerNorm,
+                 ape=False,
+                 patch_norm=True,
+                 frozen_stages=-1,
+                 use_checkpoint=False,
+                 out_features=None):
+        super(SwinTransformer, self).__init__()
+
+        self.pretrain_img_size = pretrain_img_size
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.frozen_stages = frozen_stages
+
+        self.out_features = out_features
+
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+
+        # absolute position embedding
+        if self.ape:
+            pretrain_img_size = to_2tuple(pretrain_img_size)
+            patch_size = to_2tuple(patch_size)
+            patches_resolution = [pretrain_img_size[0] // patch_size[0], pretrain_img_size[1] // patch_size[1]]
+
+            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, embed_dim, patches_resolution[0], patches_resolution[1]))
+            trunc_normal_(self.absolute_pos_embed, std=.02)
+
+        self.pos_drop = nn.Dropout(p=drop_rate)
+
+        # stochastic depth
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+
+        self._out_feature_strides = {}
+        self._out_feature_channels = {}
+
+        # build layers
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(
+                dim=int(embed_dim * 2 ** i_layer),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                norm_layer=norm_layer,
+                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
+                use_checkpoint=use_checkpoint)
+            self.layers.append(layer)
+
+            stage = f'stage{i_layer+2}'
+            if stage in self.out_features:
+                self._out_feature_channels[stage] = embed_dim * 2 ** i_layer
+                self._out_feature_strides[stage] = 4 * 2 ** i_layer
+ 
+        num_features = [int(embed_dim * 2 ** i) for i in range(self.num_layers)]
+        self.num_features = num_features
+
+        # add a norm layer for each output
+        for i_layer in range(self.num_layers):
+            stage = f'stage{i_layer+2}'
+            if stage in self.out_features:
+                layer = norm_layer(num_features[i_layer])
+                layer_name = f'norm{i_layer}'
+                self.add_module(layer_name, layer)
+
+        self._freeze_stages()
+
+    def _freeze_stages(self):
+        if self.frozen_stages >= 0:
+            self.patch_embed.eval()
+            for param in self.patch_embed.parameters():
+                param.requires_grad = False
+
+        if self.frozen_stages >= 1 and self.ape:
+            self.absolute_pos_embed.requires_grad = False
+
+        if self.frozen_stages >= 2:
+            self.pos_drop.eval()
+            for i in range(0, self.frozen_stages - 1):
+                m = self.layers[i]
+                m.eval()
+                for param in m.parameters():
+                    param.requires_grad = False
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+
+        def _init_weights(m):
+            if isinstance(m, nn.Linear):
+                trunc_normal_(m.weight, std=.02)
+                if isinstance(m, nn.Linear) and m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.LayerNorm):
+                nn.init.constant_(m.bias, 0)
+                nn.init.constant_(m.weight, 1.0)
+
+        self.apply(_init_weights)
+
+    def forward(self, x):
+        """Forward function."""
+        x = self.patch_embed(x)
+
+        Wh, Ww = x.size(2), x.size(3)
+        if self.ape:
+            # interpolate the position embedding to the corresponding size
+            absolute_pos_embed = F.interpolate(self.absolute_pos_embed, size=(Wh, Ww), mode='bicubic')
+            x = (x + absolute_pos_embed).flatten(2).transpose(1, 2)  # B Wh*Ww C
+        else:
+            x = x.flatten(2).transpose(1, 2)
+        x = self.pos_drop(x)
+
+        outs = {}
+        for i in range(self.num_layers):
+            layer = self.layers[i]
+            x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww)
+            name = f'stage{i+2}'
+            if name in self.out_features:
+                norm_layer = getattr(self, f'norm{i}')
+                x_out = norm_layer(x_out)
+                out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous()
+                outs[name] = out
+
+        return outs #{"stage%d" % (i+2,): out for i, out in enumerate(outs)} #tuple(outs)
+
+    def train(self, mode=True):
+        """Convert the model into training mode while keep layers freezed."""
+        super(SwinTransformer, self).train(mode)
+        self._freeze_stages()
+
+    def output_shape(self):
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in self.out_features
+        }
+
+@BACKBONE_REGISTRY.register()
+def build_swint_backbone(cfg, input_shape):
+    """
+    Create a SwinT instance from config.
+
+    Returns:
+        VoVNet: a :class:`VoVNet` instance.
+    """
+    out_features = cfg.MODEL.SWINT.OUT_FEATURES
+
+    return SwinTransformer(
+        patch_size=4,
+        in_chans=input_shape.channels,
+        embed_dim=cfg.MODEL.SWINT.EMBED_DIM,
+        depths=cfg.MODEL.SWINT.DEPTHS,
+        num_heads=cfg.MODEL.SWINT.NUM_HEADS,
+        window_size=cfg.MODEL.SWINT.WINDOW_SIZE,
+        mlp_ratio=cfg.MODEL.SWINT.MLP_RATIO,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        drop_path_rate=cfg.MODEL.SWINT.DROP_PATH_RATE,
+        norm_layer=nn.LayerNorm,
+        ape=cfg.MODEL.SWINT.APE,
+        patch_norm=True,
+        frozen_stages=cfg.MODEL.BACKBONE.FREEZE_AT,
+        out_features=out_features
+    )
+
+
+@BACKBONE_REGISTRY.register()
+def build_swint_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_swint_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=LastLevelMaxPool(),
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
+
+class LastLevelP6(nn.Module):
+    """
+    This module is used in FCOS to generate extra layers
+    """
+
+    def __init__(self, in_channels, out_channels, in_features="res5"):
+        super().__init__()
+        self.num_levels = 1
+        self.in_feature = in_features
+        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
+        for module in [self.p6]:
+            weight_init.c2_xavier_fill(module)
+
+    def forward(self, x):
+        p6 = self.p6(x)
+        return [p6]
+
+@BACKBONE_REGISTRY.register()
+def build_retinanet_swint_fpn_backbone(cfg, input_shape: ShapeSpec):
+    """
+    Args:
+        cfg: a detectron2 CfgNode
+
+    Returns:
+        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
+    """
+    bottom_up = build_swint_backbone(cfg, input_shape)
+    in_features = cfg.MODEL.FPN.IN_FEATURES
+    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
+    top_levels = cfg.MODEL.FPN.TOP_LEVELS
+    in_channels_top = out_channels
+    if top_levels == 2:
+        top_block = LastLevelP6P7(in_channels_top, out_channels, "p5")
+    if top_levels == 1:
+        top_block = LastLevelP6(in_channels_top, out_channels, "p5")
+    elif top_levels == 0:
+        top_block = None
+    backbone = FPN(
+        bottom_up=bottom_up,
+        in_features=in_features,
+        out_channels=out_channels,
+        norm=cfg.MODEL.FPN.NORM,
+        top_block=top_block,
+        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
+    )
+    return backbone
diff --git a/src/sts/detectron2/modeling/box_regression.py b/src/sts/detectron2/modeling/box_regression.py
new file mode 100644
index 0000000000000000000000000000000000000000..12be0008b66bd4954a5139aeb6e07d71f8159caa
--- /dev/null
+++ b/src/sts/detectron2/modeling/box_regression.py
@@ -0,0 +1,270 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+from typing import List, Tuple
+import torch
+from fvcore.nn import giou_loss, smooth_l1_loss
+
+from detectron2.layers import cat
+from detectron2.structures import Boxes
+
+# Value for clamping large dw and dh predictions. The heuristic is that we clamp
+# such that dw and dh are no larger than what would transform a 16px box into a
+# 1000px box (based on a small anchor, 16px, and a typical image size, 1000px).
+_DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16)
+
+
+__all__ = ["Box2BoxTransform", "Box2BoxTransformRotated"]
+
+
+@torch.jit.script
+class Box2BoxTransform(object):
+    """
+    The box-to-box transform defined in R-CNN. The transformation is parameterized
+    by 4 deltas: (dx, dy, dw, dh). The transformation scales the box's width and height
+    by exp(dw), exp(dh) and shifts a box's center by the offset (dx * width, dy * height).
+    """
+
+    def __init__(
+        self, weights: Tuple[float, float, float, float], scale_clamp: float = _DEFAULT_SCALE_CLAMP
+    ):
+        """
+        Args:
+            weights (4-element tuple): Scaling factors that are applied to the
+                (dx, dy, dw, dh) deltas. In Fast R-CNN, these were originally set
+                such that the deltas have unit variance; now they are treated as
+                hyperparameters of the system.
+            scale_clamp (float): When predicting deltas, the predicted box scaling
+                factors (dw and dh) are clamped such that they are <= scale_clamp.
+        """
+        self.weights = weights
+        self.scale_clamp = scale_clamp
+
+    def get_deltas(self, src_boxes, target_boxes):
+        """
+        Get box regression transformation deltas (dx, dy, dw, dh) that can be used
+        to transform the `src_boxes` into the `target_boxes`. That is, the relation
+        ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless
+        any delta is too large and is clamped).
+
+        Args:
+            src_boxes (Tensor): source boxes, e.g., object proposals
+            target_boxes (Tensor): target of the transformation, e.g., ground-truth
+                boxes.
+        """
+        assert isinstance(src_boxes, torch.Tensor), type(src_boxes)
+        assert isinstance(target_boxes, torch.Tensor), type(target_boxes)
+
+        src_widths = src_boxes[:, 2] - src_boxes[:, 0]
+        src_heights = src_boxes[:, 3] - src_boxes[:, 1]
+        src_ctr_x = src_boxes[:, 0] + 0.5 * src_widths
+        src_ctr_y = src_boxes[:, 1] + 0.5 * src_heights
+
+        target_widths = target_boxes[:, 2] - target_boxes[:, 0]
+        target_heights = target_boxes[:, 3] - target_boxes[:, 1]
+        target_ctr_x = target_boxes[:, 0] + 0.5 * target_widths
+        target_ctr_y = target_boxes[:, 1] + 0.5 * target_heights
+
+        wx, wy, ww, wh = self.weights
+        dx = wx * (target_ctr_x - src_ctr_x) / src_widths
+        dy = wy * (target_ctr_y - src_ctr_y) / src_heights
+        dw = ww * torch.log(target_widths / src_widths)
+        dh = wh * torch.log(target_heights / src_heights)
+
+        deltas = torch.stack((dx, dy, dw, dh), dim=1)
+        assert (src_widths > 0).all().item(), "Input boxes to Box2BoxTransform are not valid!"
+        return deltas
+
+    def apply_deltas(self, deltas, boxes):
+        """
+        Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`.
+
+        Args:
+            deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1.
+                deltas[i] represents k potentially different class-specific
+                box transformations for the single box boxes[i].
+            boxes (Tensor): boxes to transform, of shape (N, 4)
+        """
+        deltas = deltas.float()  # ensure fp32 for decoding precision
+        boxes = boxes.to(deltas.dtype)
+
+        widths = boxes[:, 2] - boxes[:, 0]
+        heights = boxes[:, 3] - boxes[:, 1]
+        ctr_x = boxes[:, 0] + 0.5 * widths
+        ctr_y = boxes[:, 1] + 0.5 * heights
+
+        wx, wy, ww, wh = self.weights
+        dx = deltas[:, 0::4] / wx
+        dy = deltas[:, 1::4] / wy
+        dw = deltas[:, 2::4] / ww
+        dh = deltas[:, 3::4] / wh
+
+        # Prevent sending too large values into torch.exp()
+        dw = torch.clamp(dw, max=self.scale_clamp)
+        dh = torch.clamp(dh, max=self.scale_clamp)
+
+        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
+        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
+        pred_w = torch.exp(dw) * widths[:, None]
+        pred_h = torch.exp(dh) * heights[:, None]
+
+        x1 = pred_ctr_x - 0.5 * pred_w
+        y1 = pred_ctr_y - 0.5 * pred_h
+        x2 = pred_ctr_x + 0.5 * pred_w
+        y2 = pred_ctr_y + 0.5 * pred_h
+        pred_boxes = torch.stack((x1, y1, x2, y2), dim=-1)
+        return pred_boxes.reshape(deltas.shape)
+
+
+@torch.jit.script
+class Box2BoxTransformRotated(object):
+    """
+    The box-to-box transform defined in Rotated R-CNN. The transformation is parameterized
+    by 5 deltas: (dx, dy, dw, dh, da). The transformation scales the box's width and height
+    by exp(dw), exp(dh), shifts a box's center by the offset (dx * width, dy * height),
+    and rotate a box's angle by da (radians).
+    Note: angles of deltas are in radians while angles of boxes are in degrees.
+    """
+
+    def __init__(
+        self,
+        weights: Tuple[float, float, float, float, float],
+        scale_clamp: float = _DEFAULT_SCALE_CLAMP,
+    ):
+        """
+        Args:
+            weights (5-element tuple): Scaling factors that are applied to the
+                (dx, dy, dw, dh, da) deltas. These are treated as
+                hyperparameters of the system.
+            scale_clamp (float): When predicting deltas, the predicted box scaling
+                factors (dw and dh) are clamped such that they are <= scale_clamp.
+        """
+        self.weights = weights
+        self.scale_clamp = scale_clamp
+
+    def get_deltas(self, src_boxes, target_boxes):
+        """
+        Get box regression transformation deltas (dx, dy, dw, dh, da) that can be used
+        to transform the `src_boxes` into the `target_boxes`. That is, the relation
+        ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless
+        any delta is too large and is clamped).
+
+        Args:
+            src_boxes (Tensor): Nx5 source boxes, e.g., object proposals
+            target_boxes (Tensor): Nx5 target of the transformation, e.g., ground-truth
+                boxes.
+        """
+        assert isinstance(src_boxes, torch.Tensor), type(src_boxes)
+        assert isinstance(target_boxes, torch.Tensor), type(target_boxes)
+
+        src_ctr_x, src_ctr_y, src_widths, src_heights, src_angles = torch.unbind(src_boxes, dim=1)
+
+        target_ctr_x, target_ctr_y, target_widths, target_heights, target_angles = torch.unbind(
+            target_boxes, dim=1
+        )
+
+        wx, wy, ww, wh, wa = self.weights
+        dx = wx * (target_ctr_x - src_ctr_x) / src_widths
+        dy = wy * (target_ctr_y - src_ctr_y) / src_heights
+        dw = ww * torch.log(target_widths / src_widths)
+        dh = wh * torch.log(target_heights / src_heights)
+        # Angles of deltas are in radians while angles of boxes are in degrees.
+        # the conversion to radians serve as a way to normalize the values
+        da = target_angles - src_angles
+        da = (da + 180.0) % 360.0 - 180.0  # make it in [-180, 180)
+        da *= wa * math.pi / 180.0
+
+        deltas = torch.stack((dx, dy, dw, dh, da), dim=1)
+        assert (
+            (src_widths > 0).all().item()
+        ), "Input boxes to Box2BoxTransformRotated are not valid!"
+        return deltas
+
+    def apply_deltas(self, deltas, boxes):
+        """
+        Apply transformation `deltas` (dx, dy, dw, dh, da) to `boxes`.
+
+        Args:
+            deltas (Tensor): transformation deltas of shape (N, k*5).
+                deltas[i] represents box transformation for the single box boxes[i].
+            boxes (Tensor): boxes to transform, of shape (N, 5)
+        """
+        assert deltas.shape[1] % 5 == 0 and boxes.shape[1] == 5
+
+        boxes = boxes.to(deltas.dtype).unsqueeze(2)
+
+        ctr_x = boxes[:, 0]
+        ctr_y = boxes[:, 1]
+        widths = boxes[:, 2]
+        heights = boxes[:, 3]
+        angles = boxes[:, 4]
+
+        wx, wy, ww, wh, wa = self.weights
+
+        dx = deltas[:, 0::5] / wx
+        dy = deltas[:, 1::5] / wy
+        dw = deltas[:, 2::5] / ww
+        dh = deltas[:, 3::5] / wh
+        da = deltas[:, 4::5] / wa
+
+        # Prevent sending too large values into torch.exp()
+        dw = torch.clamp(dw, max=self.scale_clamp)
+        dh = torch.clamp(dh, max=self.scale_clamp)
+
+        pred_boxes = torch.zeros_like(deltas)
+        pred_boxes[:, 0::5] = dx * widths + ctr_x  # x_ctr
+        pred_boxes[:, 1::5] = dy * heights + ctr_y  # y_ctr
+        pred_boxes[:, 2::5] = torch.exp(dw) * widths  # width
+        pred_boxes[:, 3::5] = torch.exp(dh) * heights  # height
+
+        # Following original RRPN implementation,
+        # angles of deltas are in radians while angles of boxes are in degrees.
+        pred_angle = da * 180.0 / math.pi + angles
+        pred_angle = (pred_angle + 180.0) % 360.0 - 180.0  # make it in [-180, 180)
+
+        pred_boxes[:, 4::5] = pred_angle
+
+        return pred_boxes
+
+
+def _dense_box_regression_loss(
+    anchors: List[Boxes],
+    box2box_transform: Box2BoxTransform,
+    pred_anchor_deltas: List[torch.Tensor],
+    gt_boxes: List[torch.Tensor],
+    fg_mask: torch.Tensor,
+    box_reg_loss_type="smooth_l1",
+    smooth_l1_beta=0.0,
+):
+    """
+    Compute loss for dense multi-level box regression.
+    Loss is accumulated over ``fg_mask``.
+
+    Args:
+        anchors: #lvl anchor boxes, each is (HixWixA, 4)
+        pred_anchor_deltas: #lvl predictions, each is (N, HixWixA, 4)
+        gt_boxes: N ground truth boxes, each has shape (R, 4) (R = sum(Hi * Wi * A))
+        fg_mask: the foreground boolean mask of shape (N, R) to compute loss on
+        box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou".
+        smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to
+            use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1"
+    """
+    anchors = type(anchors[0]).cat(anchors).tensor  # (R, 4)
+    if box_reg_loss_type == "smooth_l1":
+        gt_anchor_deltas = [box2box_transform.get_deltas(anchors, k) for k in gt_boxes]
+        gt_anchor_deltas = torch.stack(gt_anchor_deltas)  # (N, R, 4)
+        loss_box_reg = smooth_l1_loss(
+            cat(pred_anchor_deltas, dim=1)[fg_mask],
+            gt_anchor_deltas[fg_mask],
+            beta=smooth_l1_beta,
+            reduction="sum",
+        )
+    elif box_reg_loss_type == "giou":
+        pred_boxes = [
+            box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1)
+        ]
+        loss_box_reg = giou_loss(
+            torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum"
+        )
+    else:
+        raise ValueError(f"Invalid dense box regression loss type '{box_reg_loss_type}'")
+    return loss_box_reg
diff --git a/src/sts/detectron2/modeling/matcher.py b/src/sts/detectron2/modeling/matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..666913f76fb0b9d8a277541716f91872d8246250
--- /dev/null
+++ b/src/sts/detectron2/modeling/matcher.py
@@ -0,0 +1,126 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import List
+import torch
+
+from detectron2.layers import nonzero_tuple
+
+
+class Matcher(object):
+    """
+    This class assigns to each predicted "element" (e.g., a box) a ground-truth
+    element. Each predicted element will have exactly zero or one matches; each
+    ground-truth element may be matched to zero or more predicted elements.
+
+    The matching is determined by the MxN match_quality_matrix, that characterizes
+    how well each (ground-truth, prediction)-pair match each other. For example,
+    if the elements are boxes, this matrix may contain box intersection-over-union
+    overlap values.
+
+    The matcher returns (a) a vector of length N containing the index of the
+    ground-truth element m in [0, M) that matches to prediction n in [0, N).
+    (b) a vector of length N containing the labels for each prediction.
+    """
+
+    def __init__(
+        self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False
+    ):
+        """
+        Args:
+            thresholds (list): a list of thresholds used to stratify predictions
+                into levels.
+            labels (list): a list of values to label predictions belonging at
+                each level. A label can be one of {-1, 0, 1} signifying
+                {ignore, negative class, positive class}, respectively.
+            allow_low_quality_matches (bool): if True, produce additional matches
+                for predictions with maximum match quality lower than high_threshold.
+                See set_low_quality_matches_ for more details.
+
+            For example,
+                thresholds = [0.3, 0.5]
+                labels = [0, -1, 1]
+                All predictions with iou < 0.3 will be marked with 0 and
+                thus will be considered as false positives while training.
+                All predictions with 0.3 <= iou < 0.5 will be marked with -1 and
+                thus will be ignored.
+                All predictions with 0.5 <= iou will be marked with 1 and
+                thus will be considered as true positives.
+        """
+        # Add -inf and +inf to first and last position in thresholds
+        thresholds = thresholds[:]
+        assert thresholds[0] > 0
+        thresholds.insert(0, -float("inf"))
+        thresholds.append(float("inf"))
+        # Currently torchscript does not support all + generator
+        assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])])
+        assert all([l in [-1, 0, 1] for l in labels])
+        assert len(labels) == len(thresholds) - 1
+        self.thresholds = thresholds
+        self.labels = labels
+        self.allow_low_quality_matches = allow_low_quality_matches
+
+    def __call__(self, match_quality_matrix):
+        """
+        Args:
+            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
+                pairwise quality between M ground-truth elements and N predicted
+                elements. All elements must be >= 0 (due to the us of `torch.nonzero`
+                for selecting indices in :meth:`set_low_quality_matches_`).
+
+        Returns:
+            matches (Tensor[int64]): a vector of length N, where matches[i] is a matched
+                ground-truth index in [0, M)
+            match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates
+                whether a prediction is a true or false positive or ignored
+        """
+        assert match_quality_matrix.dim() == 2
+        if match_quality_matrix.numel() == 0:
+            default_matches = match_quality_matrix.new_full(
+                (match_quality_matrix.size(1),), 0, dtype=torch.int64
+            )
+            # When no gt boxes exist, we define IOU = 0 and therefore set labels
+            # to `self.labels[0]`, which usually defaults to background class 0
+            # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds
+            default_match_labels = match_quality_matrix.new_full(
+                (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8
+            )
+            return default_matches, default_match_labels
+
+        assert torch.all(match_quality_matrix >= 0)
+
+        # match_quality_matrix is M (gt) x N (predicted)
+        # Max over gt elements (dim 0) to find best gt candidate for each prediction
+        matched_vals, matches = match_quality_matrix.max(dim=0)
+
+        match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
+
+        for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
+            low_high = (matched_vals >= low) & (matched_vals < high)
+            match_labels[low_high] = l
+
+        if self.allow_low_quality_matches:
+            self.set_low_quality_matches_(match_labels, match_quality_matrix)
+
+        return matches, match_labels
+
+    def set_low_quality_matches_(self, match_labels, match_quality_matrix):
+        """
+        Produce additional matches for predictions that have only low-quality matches.
+        Specifically, for each ground-truth G find the set of predictions that have
+        maximum overlap with it (including ties); for each prediction in that set, if
+        it is unmatched, then match it to the ground-truth G.
+
+        This function implements the RPN assignment case (i) in Sec. 3.1.2 of
+        :paper:`Faster R-CNN`.
+        """
+        # For each gt, find the prediction with which it has highest quality
+        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
+        # Find the highest quality match available, even if it is low, including ties.
+        # Note that the matches qualities must be positive due to the use of
+        # `torch.nonzero`.
+        _, pred_inds_with_highest_quality = nonzero_tuple(
+            match_quality_matrix == highest_quality_foreach_gt[:, None]
+        )
+        # If an anchor was labeled positive only due to a low-quality match
+        # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B.
+        # This follows the implementation in Detectron, and is found to have no significant impact.
+        match_labels[pred_inds_with_highest_quality] = 1
diff --git a/src/sts/detectron2/modeling/meta_arch/__init__.py b/src/sts/detectron2/modeling/meta_arch/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4cb86ccfd7415db649fa414507992497e542c0b
--- /dev/null
+++ b/src/sts/detectron2/modeling/meta_arch/__init__.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from .build import META_ARCH_REGISTRY, build_model  # isort:skip
+
+from .panoptic_fpn import PanopticFPN
+
+# import all the meta_arch, so they will be registered
+from .rcnn import GeneralizedRCNN, ProposalNetwork
+from .retinanet import RetinaNet
+from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head
+
+
+__all__ = list(globals().keys())
diff --git a/src/sts/detectron2/modeling/meta_arch/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/modeling/meta_arch/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..89c072a296941608cd4355ac89585fba6205e372
Binary files /dev/null and b/src/sts/detectron2/modeling/meta_arch/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/meta_arch/__pycache__/build.cpython-38.pyc b/src/sts/detectron2/modeling/meta_arch/__pycache__/build.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3ced6d23ecf978a9194605a9fe325723a1ddcbe6
Binary files /dev/null and b/src/sts/detectron2/modeling/meta_arch/__pycache__/build.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/meta_arch/__pycache__/panoptic_fpn.cpython-38.pyc b/src/sts/detectron2/modeling/meta_arch/__pycache__/panoptic_fpn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..090fa22a7734e190a044e0275de9b223cbfe5c61
Binary files /dev/null and b/src/sts/detectron2/modeling/meta_arch/__pycache__/panoptic_fpn.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/meta_arch/__pycache__/rcnn.cpython-38.pyc b/src/sts/detectron2/modeling/meta_arch/__pycache__/rcnn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..01c9a550ad4a297c555b573ec341e0143b258069
Binary files /dev/null and b/src/sts/detectron2/modeling/meta_arch/__pycache__/rcnn.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/meta_arch/__pycache__/retinanet.cpython-38.pyc b/src/sts/detectron2/modeling/meta_arch/__pycache__/retinanet.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb61fe42b05f47afd29d33fd2175d55bd0b12653
Binary files /dev/null and b/src/sts/detectron2/modeling/meta_arch/__pycache__/retinanet.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/meta_arch/__pycache__/semantic_seg.cpython-38.pyc b/src/sts/detectron2/modeling/meta_arch/__pycache__/semantic_seg.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1b405b34bcb74e1c0416716d79731abf63ef4c8b
Binary files /dev/null and b/src/sts/detectron2/modeling/meta_arch/__pycache__/semantic_seg.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/meta_arch/build.py b/src/sts/detectron2/modeling/meta_arch/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..3427215746c9a146bd902f22ea9b26d121c36b27
--- /dev/null
+++ b/src/sts/detectron2/modeling/meta_arch/build.py
@@ -0,0 +1,25 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+
+from detectron2.utils.logger import _log_api_usage
+from detectron2.utils.registry import Registry
+
+META_ARCH_REGISTRY = Registry("META_ARCH")  # noqa F401 isort:skip
+META_ARCH_REGISTRY.__doc__ = """
+Registry for meta-architectures, i.e. the whole model.
+
+The registered object will be called with `obj(cfg)`
+and expected to return a `nn.Module` object.
+"""
+
+
+def build_model(cfg):
+    """
+    Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
+    Note that it does not load any weights from ``cfg``.
+    """
+    meta_arch = cfg.MODEL.META_ARCHITECTURE
+    model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
+    model.to(torch.device(cfg.MODEL.DEVICE))
+    _log_api_usage("modeling.meta_arch." + meta_arch)
+    return model
diff --git a/src/sts/detectron2/modeling/meta_arch/panoptic_fpn.py b/src/sts/detectron2/modeling/meta_arch/panoptic_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d18f3d64ac0f4e988ead4870f8b0f65f894a36b
--- /dev/null
+++ b/src/sts/detectron2/modeling/meta_arch/panoptic_fpn.py
@@ -0,0 +1,268 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+from typing import Dict, Tuple
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.structures import ImageList
+
+from ..postprocessing import detector_postprocess, sem_seg_postprocess
+from .build import META_ARCH_REGISTRY
+from .rcnn import GeneralizedRCNN
+from .semantic_seg import build_sem_seg_head
+
+__all__ = ["PanopticFPN"]
+
+
+@META_ARCH_REGISTRY.register()
+class PanopticFPN(GeneralizedRCNN):
+    """
+    Implement the paper :paper:`PanopticFPN`.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        sem_seg_head: nn.Module,
+        combine_overlap_thresh: float = 0.5,
+        combine_stuff_area_thresh: float = 4096,
+        combine_instances_score_thresh: float = 0.5,
+        **kwargs
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            sem_seg_head: a module for the semantic segmentation head.
+            combine_overlap_thresh: combine masks into one instances if
+                they have enough overlap
+            combine_stuff_area_thresh: ignore stuff areas smaller than this threshold
+            combine_instances_score_thresh: ignore instances whose score is
+                smaller than this threshold
+
+        Other arguments are the same as :class:`GeneralizedRCNN`.
+        """
+        super().__init__(**kwargs)
+        self.sem_seg_head = sem_seg_head
+        # options when combining instance & semantic outputs
+        self.combine_overlap_thresh = combine_overlap_thresh
+        self.combine_stuff_area_thresh = combine_stuff_area_thresh
+        self.combine_instances_score_thresh = combine_instances_score_thresh
+
+    @classmethod
+    def from_config(cls, cfg):
+        ret = super().from_config(cfg)
+        ret.update(
+            {
+                "combine_overlap_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH,
+                "combine_stuff_area_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT,
+                "combine_instances_score_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH,  # noqa
+            }
+        )
+        ret["sem_seg_head"] = build_sem_seg_head(cfg, ret["backbone"].output_shape())
+        logger = logging.getLogger(__name__)
+        if not cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED:
+            logger.warning(
+                "PANOPTIC_FPN.COMBINED.ENABLED is no longer used. "
+                " model.inference(do_postprocess=) should be used to toggle postprocessing."
+            )
+        if cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT != 1.0:
+            w = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT
+            logger.warning(
+                "PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT should be replaced by weights on each ROI head."
+            )
+
+            def update_weight(x):
+                if isinstance(x, dict):
+                    return {k: v * w for k, v in x.items()}
+                else:
+                    return x * w
+
+            roi_heads = ret["roi_heads"]
+            roi_heads.box_predictor.loss_weight = update_weight(roi_heads.box_predictor.loss_weight)
+            roi_heads.mask_head.loss_weight = update_weight(roi_heads.mask_head.loss_weight)
+        return ret
+
+    def forward(self, batched_inputs):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
+                Each item in the list contains the inputs for one image.
+
+                For now, each item in the list is a dict that contains:
+
+                * "image": Tensor, image in (C, H, W) format.
+                * "instances": Instances
+                * "sem_seg": semantic segmentation ground truth.
+                * Other information that's included in the original dicts, such as:
+                  "height", "width" (int): the output resolution of the model, used in inference.
+                  See :meth:`postprocess` for details.
+
+        Returns:
+            list[dict]:
+                each dict has the results for one image. The dict contains the following keys:
+
+                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
+                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
+                * "panoptic_seg": See the return value of
+                  :func:`combine_semantic_and_instance_outputs` for its format.
+        """
+        if not self.training:
+            return self.inference(batched_inputs)
+        images = self.preprocess_image(batched_inputs)
+        features = self.backbone(images.tensor)
+
+        assert "sem_seg" in batched_inputs[0]
+        gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
+        gt_sem_seg = ImageList.from_tensors(
+            gt_sem_seg, self.backbone.size_divisibility, self.sem_seg_head.ignore_value
+        ).tensor
+        sem_seg_results, sem_seg_losses = self.sem_seg_head(features, gt_sem_seg)
+
+        gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+        proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
+        detector_results, detector_losses = self.roi_heads(
+            images, features, proposals, gt_instances
+        )
+
+        losses = sem_seg_losses
+        losses.update(proposal_losses)
+        losses.update(detector_losses)
+        return losses
+
+    def inference(
+        self, batched_inputs: Tuple[Dict[str, torch.Tensor]], do_postprocess: bool = True
+    ):
+        """
+        Run inference on the given inputs.
+
+        Args:
+            batched_inputs (list[dict]): same as in :meth:`forward`
+            do_postprocess (bool): whether to apply post-processing on the outputs.
+
+        Returns:
+            When do_postprocess=True, see docs in :meth:`forward`.
+            Otherwise, returns a (list[Instances], list[Tensor]) that contains
+            the raw detector outputs, and raw semantic segmentation outputs.
+        """
+        images = self.preprocess_image(batched_inputs)
+        features = self.backbone(images.tensor)
+        sem_seg_results, sem_seg_losses = self.sem_seg_head(features, None)
+        proposals, _ = self.proposal_generator(images, features, None)
+        detector_results, _ = self.roi_heads(images, features, proposals, None)
+
+        if do_postprocess:
+            processed_results = []
+            for sem_seg_result, detector_result, input_per_image, image_size in zip(
+                sem_seg_results, detector_results, batched_inputs, images.image_sizes
+            ):
+                height = input_per_image.get("height", image_size[0])
+                width = input_per_image.get("width", image_size[1])
+                sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
+                detector_r = detector_postprocess(detector_result, height, width)
+
+                processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r})
+
+                panoptic_r = combine_semantic_and_instance_outputs(
+                    detector_r,
+                    sem_seg_r.argmax(dim=0),
+                    self.combine_overlap_thresh,
+                    self.combine_stuff_area_thresh,
+                    self.combine_instances_score_thresh,
+                )
+                processed_results[-1]["panoptic_seg"] = panoptic_r
+            return processed_results
+        else:
+            return detector_results, sem_seg_results
+
+
+def combine_semantic_and_instance_outputs(
+    instance_results,
+    semantic_results,
+    overlap_threshold,
+    stuff_area_thresh,
+    instances_score_thresh,
+):
+    """
+    Implement a simple combining logic following
+    "combine_semantic_and_instance_predictions.py" in panopticapi
+    to produce panoptic segmentation outputs.
+
+    Args:
+        instance_results: output of :func:`detector_postprocess`.
+        semantic_results: an (H, W) tensor, each element is the contiguous semantic
+            category id
+
+    Returns:
+        panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment.
+        segments_info (list[dict]): Describe each segment in `panoptic_seg`.
+            Each dict contains keys "id", "category_id", "isthing".
+    """
+    panoptic_seg = torch.zeros_like(semantic_results, dtype=torch.int32)
+
+    # sort instance outputs by scores
+    sorted_inds = torch.argsort(-instance_results.scores)
+
+    current_segment_id = 0
+    segments_info = []
+
+    instance_masks = instance_results.pred_masks.to(dtype=torch.bool, device=panoptic_seg.device)
+
+    # Add instances one-by-one, check for overlaps with existing ones
+    for inst_id in sorted_inds:
+        score = instance_results.scores[inst_id].item()
+        if score < instances_score_thresh:
+            break
+        mask = instance_masks[inst_id]  # H,W
+        mask_area = mask.sum().item()
+
+        if mask_area == 0:
+            continue
+
+        intersect = (mask > 0) & (panoptic_seg > 0)
+        intersect_area = intersect.sum().item()
+
+        if intersect_area * 1.0 / mask_area > overlap_threshold:
+            continue
+
+        if intersect_area > 0:
+            mask = mask & (panoptic_seg == 0)
+
+        current_segment_id += 1
+        panoptic_seg[mask] = current_segment_id
+        segments_info.append(
+            {
+                "id": current_segment_id,
+                "isthing": True,
+                "score": score,
+                "category_id": instance_results.pred_classes[inst_id].item(),
+                "instance_id": inst_id.item(),
+            }
+        )
+
+    # Add semantic results to remaining empty areas
+    semantic_labels = torch.unique(semantic_results).cpu().tolist()
+    for semantic_label in semantic_labels:
+        if semantic_label == 0:  # 0 is a special "thing" class
+            continue
+        mask = (semantic_results == semantic_label) & (panoptic_seg == 0)
+        mask_area = mask.sum().item()
+        if mask_area < stuff_area_thresh:
+            continue
+
+        current_segment_id += 1
+        panoptic_seg[mask] = current_segment_id
+        segments_info.append(
+            {
+                "id": current_segment_id,
+                "isthing": False,
+                "category_id": semantic_label,
+                "area": mask_area,
+            }
+        )
+
+    return panoptic_seg, segments_info
diff --git a/src/sts/detectron2/modeling/meta_arch/rcnn.py b/src/sts/detectron2/modeling/meta_arch/rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..83e5c187cf420a417dc067f2fcf92d48fb05a666
--- /dev/null
+++ b/src/sts/detectron2/modeling/meta_arch/rcnn.py
@@ -0,0 +1,327 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.data.detection_utils import convert_image_to_rgb
+from detectron2.structures import ImageList, Instances
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.logger import log_first_n
+
+from ..backbone import Backbone, build_backbone
+from ..postprocessing import detector_postprocess
+from ..proposal_generator import build_proposal_generator
+from ..roi_heads import build_roi_heads
+from .build import META_ARCH_REGISTRY
+
+__all__ = ["GeneralizedRCNN", "ProposalNetwork"]
+
+
+@META_ARCH_REGISTRY.register()
+class GeneralizedRCNN(nn.Module):
+    """
+    Generalized R-CNN. Any models that contains the following three components:
+    1. Per-image feature extraction (aka backbone)
+    2. Region proposal generation
+    3. Per-region feature extraction and prediction
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        backbone: Backbone,
+        proposal_generator: nn.Module,
+        roi_heads: nn.Module,
+        pixel_mean: Tuple[float],
+        pixel_std: Tuple[float],
+        input_format: Optional[str] = None,
+        vis_period: int = 0,
+    ):
+        """
+        Args:
+            backbone: a backbone module, must follow detectron2's backbone interface
+            proposal_generator: a module that generates proposals using backbone features
+            roi_heads: a ROI head that performs per-region computation
+            pixel_mean, pixel_std: list or tuple with #channels element, representing
+                the per-channel mean and std to be used to normalize the input image
+            input_format: describe the meaning of channels of input. Needed by visualization
+            vis_period: the period to run visualization. Set to 0 to disable.
+        """
+        super().__init__()
+        self.backbone = backbone
+        self.proposal_generator = proposal_generator
+        self.roi_heads = roi_heads
+
+        self.input_format = input_format
+        self.vis_period = vis_period
+        if vis_period > 0:
+            assert input_format is not None, "input_format is required for visualization!"
+
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+        assert (
+            self.pixel_mean.shape == self.pixel_std.shape
+        ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!"
+
+    @classmethod
+    def from_config(cls, cfg):
+        backbone = build_backbone(cfg)
+        return {
+            "backbone": backbone,
+            "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()),
+            "roi_heads": build_roi_heads(cfg, backbone.output_shape()),
+            "input_format": cfg.INPUT.FORMAT,
+            "vis_period": cfg.VIS_PERIOD,
+            "pixel_mean": cfg.MODEL.PIXEL_MEAN,
+            "pixel_std": cfg.MODEL.PIXEL_STD,
+        }
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def visualize_training(self, batched_inputs, proposals):
+        """
+        A function used to visualize images and proposals. It shows ground truth
+        bounding boxes on the original image and up to 20 top-scoring predicted
+        object proposals on the original image. Users can implement different
+        visualization functions for different models.
+
+        Args:
+            batched_inputs (list): a list that contains input to the model.
+            proposals (list): a list that contains predicted proposals. Both
+                batched_inputs and proposals should have the same length.
+        """
+        from detectron2.utils.visualizer import Visualizer
+
+        storage = get_event_storage()
+        max_vis_prop = 20
+
+        for input, prop in zip(batched_inputs, proposals):
+            img = input["image"]
+            img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format)
+            v_gt = Visualizer(img, None)
+            v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes)
+            anno_img = v_gt.get_image()
+            box_size = min(len(prop.proposal_boxes), max_vis_prop)
+            v_pred = Visualizer(img, None)
+            v_pred = v_pred.overlay_instances(
+                boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy()
+            )
+            prop_img = v_pred.get_image()
+            vis_img = np.concatenate((anno_img, prop_img), axis=1)
+            vis_img = vis_img.transpose(2, 0, 1)
+            vis_name = "Left: GT bounding boxes;  Right: Predicted proposals"
+            storage.put_image(vis_name, vis_img)
+            break  # only visualize one image in a batch
+
+    def forward(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
+                Each item in the list contains the inputs for one image.
+                For now, each item in the list is a dict that contains:
+
+                * image: Tensor, image in (C, H, W) format.
+                * instances (optional): groundtruth :class:`Instances`
+                * proposals (optional): :class:`Instances`, precomputed proposals.
+
+                Other information that's included in the original dicts, such as:
+
+                * "height", "width" (int): the output resolution of the model, used in inference.
+                  See :meth:`postprocess` for details.
+
+        Returns:
+            list[dict]:
+                Each dict is the output for one input image.
+                The dict contains one key "instances" whose value is a :class:`Instances`.
+                The :class:`Instances` object has the following keys:
+                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
+        """
+        if not self.training:
+            return self.inference(batched_inputs)
+
+        images = self.preprocess_image(batched_inputs)
+        if "instances" in batched_inputs[0]:
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+        else:
+            gt_instances = None
+
+        features = self.backbone(images.tensor)
+
+        if self.proposal_generator is not None:
+            proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
+        else:
+            assert "proposals" in batched_inputs[0]
+            proposals = [x["proposals"].to(self.device) for x in batched_inputs]
+            proposal_losses = {}
+
+        _, detector_losses = self.roi_heads(images, features, proposals, gt_instances)
+        if self.vis_period > 0:
+            storage = get_event_storage()
+            if storage.iter % self.vis_period == 0:
+                self.visualize_training(batched_inputs, proposals)
+
+        losses = {}
+        losses.update(detector_losses)
+        losses.update(proposal_losses)
+        return losses
+
+    def inference(
+        self,
+        batched_inputs: Tuple[Dict[str, torch.Tensor]],
+        detected_instances: Optional[List[Instances]] = None,
+        do_postprocess: bool = True,
+    ):
+        """
+        Run inference on the given inputs.
+
+        Args:
+            batched_inputs (list[dict]): same as in :meth:`forward`
+            detected_instances (None or list[Instances]): if not None, it
+                contains an `Instances` object per image. The `Instances`
+                object contains "pred_boxes" and "pred_classes" which are
+                known boxes in the image.
+                The inference will then skip the detection of bounding boxes,
+                and only predict other per-ROI outputs.
+            do_postprocess (bool): whether to apply post-processing on the outputs.
+
+        Returns:
+            When do_postprocess=True, same as in :meth:`forward`.
+            Otherwise, a list[Instances] containing raw network outputs.
+        """
+        assert not self.training
+
+        images = self.preprocess_image(batched_inputs)
+        features = self.backbone(images.tensor)
+
+        if detected_instances is None:
+            if self.proposal_generator is not None:
+                proposals, _ = self.proposal_generator(images, features, None)
+            else:
+                assert "proposals" in batched_inputs[0]
+                proposals = [x["proposals"].to(self.device) for x in batched_inputs]
+
+            results, _ = self.roi_heads(images, features, proposals, None)
+        else:
+            detected_instances = [x.to(self.device) for x in detected_instances]
+            results = self.roi_heads.forward_with_given_boxes(features, detected_instances)
+
+        if do_postprocess:
+            assert not torch.jit.is_scripting(), "Scripting is not supported for postprocess."
+            return GeneralizedRCNN._postprocess(results, batched_inputs, images.image_sizes)
+        else:
+            return results
+
+    def preprocess_image(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
+        """
+        Normalize, pad and batch the input images.
+        """
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
+        return images
+
+    @staticmethod
+    def _postprocess(instances, batched_inputs: Tuple[Dict[str, torch.Tensor]], image_sizes):
+        """
+        Rescale the output instances to the target size.
+        """
+        # note: private function; subject to changes
+        processed_results = []
+        for results_per_image, input_per_image, image_size in zip(
+            instances, batched_inputs, image_sizes
+        ):
+            height = input_per_image.get("height", image_size[0])
+            width = input_per_image.get("width", image_size[1])
+            r = detector_postprocess(results_per_image, height, width)
+            processed_results.append({"instances": r})
+        return processed_results
+
+
+@META_ARCH_REGISTRY.register()
+class ProposalNetwork(nn.Module):
+    """
+    A meta architecture that only predicts object proposals.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        backbone: Backbone,
+        proposal_generator: nn.Module,
+        pixel_mean: Tuple[float],
+        pixel_std: Tuple[float],
+    ):
+        """
+        Args:
+            backbone: a backbone module, must follow detectron2's backbone interface
+            proposal_generator: a module that generates proposals using backbone features
+            pixel_mean, pixel_std: list or tuple with #channels element, representing
+                the per-channel mean and std to be used to normalize the input image
+        """
+        super().__init__()
+        self.backbone = backbone
+        self.proposal_generator = proposal_generator
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+
+    @classmethod
+    def from_config(cls, cfg):
+        backbone = build_backbone(cfg)
+        return {
+            "backbone": backbone,
+            "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()),
+            "pixel_mean": cfg.MODEL.PIXEL_MEAN,
+            "pixel_std": cfg.MODEL.PIXEL_STD,
+        }
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def forward(self, batched_inputs):
+        """
+        Args:
+            Same as in :class:`GeneralizedRCNN.forward`
+
+        Returns:
+            list[dict]:
+                Each dict is the output for one input image.
+                The dict contains one key "proposals" whose value is a
+                :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
+        """
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
+        features = self.backbone(images.tensor)
+
+        if "instances" in batched_inputs[0]:
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+        elif "targets" in batched_inputs[0]:
+            log_first_n(
+                logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
+            )
+            gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
+        else:
+            gt_instances = None
+        proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
+        # In training, the proposals are not useful at all but we generate them anyway.
+        # This makes RPN-only models about 5% slower.
+        if self.training:
+            return proposal_losses
+
+        processed_results = []
+        for results_per_image, input_per_image, image_size in zip(
+            proposals, batched_inputs, images.image_sizes
+        ):
+            height = input_per_image.get("height", image_size[0])
+            width = input_per_image.get("width", image_size[1])
+            r = detector_postprocess(results_per_image, height, width)
+            processed_results.append({"proposals": r})
+        return processed_results
diff --git a/src/sts/detectron2/modeling/meta_arch/retinanet.py b/src/sts/detectron2/modeling/meta_arch/retinanet.py
new file mode 100644
index 0000000000000000000000000000000000000000..20cff9e3ca581e0f49d87882de803edbf2acb8d0
--- /dev/null
+++ b/src/sts/detectron2/modeling/meta_arch/retinanet.py
@@ -0,0 +1,609 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import math
+import numpy as np
+from typing import Dict, List, Tuple
+import torch
+from fvcore.nn import sigmoid_focal_loss_jit
+from torch import Tensor, nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.data.detection_utils import convert_image_to_rgb
+from detectron2.layers import ShapeSpec, batched_nms, cat, get_norm, nonzero_tuple
+from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou
+from detectron2.utils.events import get_event_storage
+
+from ..anchor_generator import build_anchor_generator
+from ..backbone import Backbone, build_backbone
+from ..box_regression import Box2BoxTransform, _dense_box_regression_loss
+from ..matcher import Matcher
+from ..postprocessing import detector_postprocess
+from .build import META_ARCH_REGISTRY
+
+__all__ = ["RetinaNet"]
+
+
+logger = logging.getLogger(__name__)
+
+
+def permute_to_N_HWA_K(tensor, K: int):
+    """
+    Transpose/reshape a tensor from (N, (Ai x K), H, W) to (N, (HxWxAi), K)
+    """
+    assert tensor.dim() == 4, tensor.shape
+    N, _, H, W = tensor.shape
+    tensor = tensor.view(N, -1, K, H, W)
+    tensor = tensor.permute(0, 3, 4, 1, 2)
+    tensor = tensor.reshape(N, -1, K)  # Size=(N,HWA,K)
+    return tensor
+
+
+@META_ARCH_REGISTRY.register()
+class RetinaNet(nn.Module):
+    """
+    Implement RetinaNet in :paper:`RetinaNet`.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        backbone: Backbone,
+        head: nn.Module,
+        head_in_features,
+        anchor_generator,
+        box2box_transform,
+        anchor_matcher,
+        num_classes,
+        focal_loss_alpha=0.25,
+        focal_loss_gamma=2.0,
+        smooth_l1_beta=0.0,
+        box_reg_loss_type="smooth_l1",
+        test_score_thresh=0.05,
+        test_topk_candidates=1000,
+        test_nms_thresh=0.5,
+        max_detections_per_image=100,
+        pixel_mean,
+        pixel_std,
+        vis_period=0,
+        input_format="BGR",
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            backbone: a backbone module, must follow detectron2's backbone interface
+            head (nn.Module): a module that predicts logits and regression deltas
+                for each level from a list of per-level features
+            head_in_features (Tuple[str]): Names of the input feature maps to be used in head
+            anchor_generator (nn.Module): a module that creates anchors from a
+                list of features. Usually an instance of :class:`AnchorGenerator`
+            box2box_transform (Box2BoxTransform): defines the transform from anchors boxes to
+                instance boxes
+            anchor_matcher (Matcher): label the anchors by matching them with ground truth.
+            num_classes (int): number of classes. Used to label background proposals.
+
+            # Loss parameters:
+            focal_loss_alpha (float): focal_loss_alpha
+            focal_loss_gamma (float): focal_loss_gamma
+            smooth_l1_beta (float): smooth_l1_beta
+            box_reg_loss_type (str): Options are "smooth_l1", "giou"
+
+            # Inference parameters:
+            test_score_thresh (float): Inference cls score threshold, only anchors with
+                score > INFERENCE_TH are considered for inference (to improve speed)
+            test_topk_candidates (int): Select topk candidates before NMS
+            test_nms_thresh (float): Overlap threshold used for non-maximum suppression
+                (suppress boxes with IoU >= this threshold)
+            max_detections_per_image (int):
+                Maximum number of detections to return per image during inference
+                (100 is based on the limit established for the COCO dataset).
+
+            # Input parameters
+            pixel_mean (Tuple[float]):
+                Values to be used for image normalization (BGR order).
+                To train on images of different number of channels, set different mean & std.
+                Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
+            pixel_std (Tuple[float]):
+                When using pre-trained models in Detectron1 or any MSRA models,
+                std has been absorbed into its conv1 weights, so the std needs to be set 1.
+                Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
+            vis_period (int):
+                The period (in terms of steps) for minibatch visualization at train time.
+                Set to 0 to disable.
+            input_format (str): Whether the model needs RGB, YUV, HSV etc.
+        """
+        super().__init__()
+
+        self.backbone = backbone
+        self.head = head
+        self.head_in_features = head_in_features
+        if len(self.backbone.output_shape()) != len(self.head_in_features):
+            logger.warning("[RetinaNet] Backbone produces unused features.")
+
+        # Anchors
+        self.anchor_generator = anchor_generator
+        self.box2box_transform = box2box_transform
+        self.anchor_matcher = anchor_matcher
+
+        self.num_classes = num_classes
+        # Loss parameters:
+        self.focal_loss_alpha = focal_loss_alpha
+        self.focal_loss_gamma = focal_loss_gamma
+        self.smooth_l1_beta = smooth_l1_beta
+        self.box_reg_loss_type = box_reg_loss_type
+        # Inference parameters:
+        self.test_score_thresh = test_score_thresh
+        self.test_topk_candidates = test_topk_candidates
+        self.test_nms_thresh = test_nms_thresh
+        self.max_detections_per_image = max_detections_per_image
+        # Vis parameters
+        self.vis_period = vis_period
+        self.input_format = input_format
+
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+
+        """
+        In Detectron1, loss is normalized by number of foreground samples in the batch.
+        When batch size is 1 per GPU, #foreground has a large variance and
+        using it lead to lower performance. Here we maintain an EMA of #foreground to
+        stabilize the normalizer.
+        """
+        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
+        self.loss_normalizer_momentum = 0.9
+
+    @classmethod
+    def from_config(cls, cfg):
+        backbone = build_backbone(cfg)
+        backbone_shape = backbone.output_shape()
+        feature_shapes = [backbone_shape[f] for f in cfg.MODEL.RETINANET.IN_FEATURES]
+        head = RetinaNetHead(cfg, feature_shapes)
+        anchor_generator = build_anchor_generator(cfg, feature_shapes)
+        return {
+            "backbone": backbone,
+            "head": head,
+            "anchor_generator": anchor_generator,
+            "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS),
+            "anchor_matcher": Matcher(
+                cfg.MODEL.RETINANET.IOU_THRESHOLDS,
+                cfg.MODEL.RETINANET.IOU_LABELS,
+                allow_low_quality_matches=True,
+            ),
+            "pixel_mean": cfg.MODEL.PIXEL_MEAN,
+            "pixel_std": cfg.MODEL.PIXEL_STD,
+            "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES,
+            "head_in_features": cfg.MODEL.RETINANET.IN_FEATURES,
+            # Loss parameters:
+            "focal_loss_alpha": cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA,
+            "focal_loss_gamma": cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA,
+            "smooth_l1_beta": cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA,
+            "box_reg_loss_type": cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE,
+            # Inference parameters:
+            "test_score_thresh": cfg.MODEL.RETINANET.SCORE_THRESH_TEST,
+            "test_topk_candidates": cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST,
+            "test_nms_thresh": cfg.MODEL.RETINANET.NMS_THRESH_TEST,
+            "max_detections_per_image": cfg.TEST.DETECTIONS_PER_IMAGE,
+            # Vis parameters
+            "vis_period": cfg.VIS_PERIOD,
+            "input_format": cfg.INPUT.FORMAT,
+        }
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def visualize_training(self, batched_inputs, results):
+        """
+        A function used to visualize ground truth images and final network predictions.
+        It shows ground truth bounding boxes on the original image and up to 20
+        predicted object bounding boxes on the original image.
+
+        Args:
+            batched_inputs (list): a list that contains input to the model.
+            results (List[Instances]): a list of #images elements.
+        """
+        from detectron2.utils.visualizer import Visualizer
+
+        assert len(batched_inputs) == len(
+            results
+        ), "Cannot visualize inputs and results of different sizes"
+        storage = get_event_storage()
+        max_boxes = 20
+
+        image_index = 0  # only visualize a single image
+        img = batched_inputs[image_index]["image"]
+        img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format)
+        v_gt = Visualizer(img, None)
+        v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes)
+        anno_img = v_gt.get_image()
+        processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1])
+        predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu().numpy()
+
+        v_pred = Visualizer(img, None)
+        v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes])
+        prop_img = v_pred.get_image()
+        vis_img = np.vstack((anno_img, prop_img))
+        vis_img = vis_img.transpose(2, 0, 1)
+        vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results"
+        storage.put_image(vis_name, vis_img)
+
+    def forward(self, batched_inputs: Tuple[Dict[str, Tensor]]):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
+                Each item in the list contains the inputs for one image.
+                For now, each item in the list is a dict that contains:
+
+                * image: Tensor, image in (C, H, W) format.
+                * instances: Instances
+
+                Other information that's included in the original dicts, such as:
+
+                * "height", "width" (int): the output resolution of the model, used in inference.
+                  See :meth:`postprocess` for details.
+        Returns:
+            In training, dict[str, Tensor]: mapping from a named loss to a tensor storing the
+            loss. Used during training only. In inference, the standard output format, described
+            in :doc:`/tutorials/models`.
+        """
+        images = self.preprocess_image(batched_inputs)
+        features = self.backbone(images.tensor)
+        features = [features[f] for f in self.head_in_features]
+
+        anchors = self.anchor_generator(features)
+        pred_logits, pred_anchor_deltas = self.head(features)
+        # Transpose the Hi*Wi*A dimension to the middle:
+        pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits]
+        pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas]
+
+        if self.training:
+            assert not torch.jit.is_scripting(), "Not supported"
+            assert "instances" in batched_inputs[0], "Instance annotations are missing in training!"
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+
+            gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances)
+            losses = self.losses(anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes)
+
+            if self.vis_period > 0:
+                storage = get_event_storage()
+                if storage.iter % self.vis_period == 0:
+                    results = self.inference(
+                        anchors, pred_logits, pred_anchor_deltas, images.image_sizes
+                    )
+                    self.visualize_training(batched_inputs, results)
+
+            return losses
+        else:
+            results = self.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes)
+            if torch.jit.is_scripting():
+                return results
+            processed_results = []
+            for results_per_image, input_per_image, image_size in zip(
+                results, batched_inputs, images.image_sizes
+            ):
+                height = input_per_image.get("height", image_size[0])
+                width = input_per_image.get("width", image_size[1])
+                r = detector_postprocess(results_per_image, height, width)
+                processed_results.append({"instances": r})
+            return processed_results
+
+    def losses(self, anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes):
+        """
+        Args:
+            anchors (list[Boxes]): a list of #feature level Boxes
+            gt_labels, gt_boxes: see output of :meth:`RetinaNet.label_anchors`.
+                Their shapes are (N, R) and (N, R, 4), respectively, where R is
+                the total number of anchors across levels, i.e. sum(Hi x Wi x Ai)
+            pred_logits, pred_anchor_deltas: both are list[Tensor]. Each element in the
+                list corresponds to one level and has shape (N, Hi * Wi * Ai, K or 4).
+                Where K is the number of classes used in `pred_logits`.
+
+        Returns:
+            dict[str, Tensor]:
+                mapping from a named loss to a scalar tensor
+                storing the loss. Used during training only. The dict keys are:
+                "loss_cls" and "loss_box_reg"
+        """
+        num_images = len(gt_labels)
+        gt_labels = torch.stack(gt_labels)  # (N, R)
+
+        valid_mask = gt_labels >= 0
+        pos_mask = (gt_labels >= 0) & (gt_labels != self.num_classes)
+        num_pos_anchors = pos_mask.sum().item()
+        get_event_storage().put_scalar("num_pos_anchors", num_pos_anchors / num_images)
+        self.loss_normalizer = self.loss_normalizer_momentum * self.loss_normalizer + (
+            1 - self.loss_normalizer_momentum
+        ) * max(num_pos_anchors, 1)
+
+        # classification and regression loss
+        gt_labels_target = F.one_hot(gt_labels[valid_mask], num_classes=self.num_classes + 1)[
+            :, :-1
+        ]  # no loss for the last (background) class
+        loss_cls = sigmoid_focal_loss_jit(
+            cat(pred_logits, dim=1)[valid_mask],
+            gt_labels_target.to(pred_logits[0].dtype),
+            alpha=self.focal_loss_alpha,
+            gamma=self.focal_loss_gamma,
+            reduction="sum",
+        )
+
+        loss_box_reg = _dense_box_regression_loss(
+            anchors,
+            self.box2box_transform,
+            pred_anchor_deltas,
+            gt_boxes,
+            pos_mask,
+            box_reg_loss_type=self.box_reg_loss_type,
+            smooth_l1_beta=self.smooth_l1_beta,
+        )
+
+        return {
+            "loss_cls": loss_cls / self.loss_normalizer,
+            "loss_box_reg": loss_box_reg / self.loss_normalizer,
+        }
+
+    @torch.no_grad()
+    def label_anchors(self, anchors, gt_instances):
+        """
+        Args:
+            anchors (list[Boxes]): A list of #feature level Boxes.
+                The Boxes contains anchors of this image on the specific feature level.
+            gt_instances (list[Instances]): a list of N `Instances`s. The i-th
+                `Instances` contains the ground-truth per-instance annotations
+                for the i-th input image.
+
+        Returns:
+            list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is
+            the total number of anchors across all feature maps (sum(Hi * Wi * A)).
+            Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background.
+
+            list[Tensor]: i-th element is a Rx4 tensor, where R is the total number of anchors
+            across feature maps. The values are the matched gt boxes for each anchor.
+            Values are undefined for those anchors not labeled as foreground.
+        """
+        anchors = Boxes.cat(anchors)  # Rx4
+
+        gt_labels = []
+        matched_gt_boxes = []
+        for gt_per_image in gt_instances:
+            match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors)
+            matched_idxs, anchor_labels = self.anchor_matcher(match_quality_matrix)
+            del match_quality_matrix
+
+            if len(gt_per_image) > 0:
+                matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs]
+
+                gt_labels_i = gt_per_image.gt_classes[matched_idxs]
+                # Anchors with label 0 are treated as background.
+                gt_labels_i[anchor_labels == 0] = self.num_classes
+                # Anchors with label -1 are ignored.
+                gt_labels_i[anchor_labels == -1] = -1
+            else:
+                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
+                gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes
+
+            gt_labels.append(gt_labels_i)
+            matched_gt_boxes.append(matched_gt_boxes_i)
+
+        return gt_labels, matched_gt_boxes
+
+    def inference(
+        self,
+        anchors: List[Boxes],
+        pred_logits: List[Tensor],
+        pred_anchor_deltas: List[Tensor],
+        image_sizes: List[Tuple[int, int]],
+    ):
+        """
+        Arguments:
+            anchors (list[Boxes]): A list of #feature level Boxes.
+                The Boxes contain anchors of this image on the specific feature level.
+            pred_logits, pred_anchor_deltas: list[Tensor], one per level. Each
+                has shape (N, Hi * Wi * Ai, K or 4)
+            image_sizes (List[(h, w)]): the input image sizes
+
+        Returns:
+            results (List[Instances]): a list of #images elements.
+        """
+        results: List[Instances] = []
+        for img_idx, image_size in enumerate(image_sizes):
+            pred_logits_per_image = [x[img_idx] for x in pred_logits]
+            deltas_per_image = [x[img_idx] for x in pred_anchor_deltas]
+            results_per_image = self.inference_single_image(
+                anchors, pred_logits_per_image, deltas_per_image, image_size
+            )
+            results.append(results_per_image)
+        return results
+
+    def inference_single_image(
+        self,
+        anchors: List[Boxes],
+        box_cls: List[Tensor],
+        box_delta: List[Tensor],
+        image_size: Tuple[int, int],
+    ):
+        """
+        Single-image inference. Return bounding-box detection results by thresholding
+        on scores and applying non-maximum suppression (NMS).
+
+        Arguments:
+            anchors (list[Boxes]): list of #feature levels. Each entry contains
+                a Boxes object, which contains all the anchors in that feature level.
+            box_cls (list[Tensor]): list of #feature levels. Each entry contains
+                tensor of size (H x W x A, K)
+            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
+            image_size (tuple(H, W)): a tuple of the image height and width.
+
+        Returns:
+            Same as `inference`, but for only one image.
+        """
+        boxes_all = []
+        scores_all = []
+        class_idxs_all = []
+
+        # Iterate over every feature level
+        for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors):
+            # (HxWxAxK,)
+            predicted_prob = box_cls_i.flatten().sigmoid_()
+
+            # Apply two filtering below to make NMS faster.
+            # 1. Keep boxes with confidence score higher than threshold
+            keep_idxs = predicted_prob > self.test_score_thresh
+            predicted_prob = predicted_prob[keep_idxs]
+            topk_idxs = nonzero_tuple(keep_idxs)[0]
+
+            # 2. Keep top k top scoring boxes only
+            num_topk = min(self.test_topk_candidates, topk_idxs.size(0))
+            # torch.sort is actually faster than .topk (at least on GPUs)
+            predicted_prob, idxs = predicted_prob.sort(descending=True)
+            predicted_prob = predicted_prob[:num_topk]
+            topk_idxs = topk_idxs[idxs[:num_topk]]
+
+            anchor_idxs = topk_idxs // self.num_classes
+            classes_idxs = topk_idxs % self.num_classes
+
+            box_reg_i = box_reg_i[anchor_idxs]
+            anchors_i = anchors_i[anchor_idxs]
+            # predict boxes
+            predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor)
+
+            boxes_all.append(predicted_boxes)
+            scores_all.append(predicted_prob)
+            class_idxs_all.append(classes_idxs)
+
+        boxes_all, scores_all, class_idxs_all = [
+            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
+        ]
+        keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.test_nms_thresh)
+        keep = keep[: self.max_detections_per_image]
+
+        result = Instances(image_size)
+        result.pred_boxes = Boxes(boxes_all[keep])
+        result.scores = scores_all[keep]
+        result.pred_classes = class_idxs_all[keep]
+        return result
+
+    def preprocess_image(self, batched_inputs: Tuple[Dict[str, Tensor]]):
+        """
+        Normalize, pad and batch the input images.
+        """
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
+        return images
+
+
+class RetinaNetHead(nn.Module):
+    """
+    The head used in RetinaNet for object classification and box regression.
+    It has two subnets for the two tasks, with a common structure but separate parameters.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        input_shape: List[ShapeSpec],
+        num_classes,
+        num_anchors,
+        conv_dims: List[int],
+        norm="",
+        prior_prob=0.01,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (List[ShapeSpec]): input shape
+            num_classes (int): number of classes. Used to label background proposals.
+            num_anchors (int): number of generated anchors
+            conv_dims (List[int]): dimensions for each convolution layer
+            norm (str or callable):
+                    Normalization for conv layers except for the two output layers.
+                    See :func:`detectron2.layers.get_norm` for supported types.
+            prior_prob (float): Prior weight for computing bias
+        """
+        super().__init__()
+
+        if norm == "BN" or norm == "SyncBN":
+            logger.warning("Shared norm does not work well for BN, SyncBN, expect poor results")
+
+        cls_subnet = []
+        bbox_subnet = []
+        for in_channels, out_channels in zip(
+            [input_shape[0].channels] + list(conv_dims), conv_dims
+        ):
+            cls_subnet.append(
+                nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+            )
+            if norm:
+                cls_subnet.append(get_norm(norm, out_channels))
+            cls_subnet.append(nn.ReLU())
+            bbox_subnet.append(
+                nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+            )
+            if norm:
+                bbox_subnet.append(get_norm(norm, out_channels))
+            bbox_subnet.append(nn.ReLU())
+
+        self.cls_subnet = nn.Sequential(*cls_subnet)
+        self.bbox_subnet = nn.Sequential(*bbox_subnet)
+        self.cls_score = nn.Conv2d(
+            conv_dims[-1], num_anchors * num_classes, kernel_size=3, stride=1, padding=1
+        )
+        self.bbox_pred = nn.Conv2d(
+            conv_dims[-1], num_anchors * 4, kernel_size=3, stride=1, padding=1
+        )
+
+        # Initialization
+        for modules in [self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred]:
+            for layer in modules.modules():
+                if isinstance(layer, nn.Conv2d):
+                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
+                    torch.nn.init.constant_(layer.bias, 0)
+
+        # Use prior in model initialization to improve stability
+        bias_value = -(math.log((1 - prior_prob) / prior_prob))
+        torch.nn.init.constant_(self.cls_score.bias, bias_value)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: List[ShapeSpec]):
+        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
+        assert (
+            len(set(num_anchors)) == 1
+        ), "Using different number of anchors between levels is not currently supported!"
+        num_anchors = num_anchors[0]
+
+        return {
+            "input_shape": input_shape,
+            "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES,
+            "conv_dims": [input_shape[0].channels] * cfg.MODEL.RETINANET.NUM_CONVS,
+            "prior_prob": cfg.MODEL.RETINANET.PRIOR_PROB,
+            "norm": cfg.MODEL.RETINANET.NORM,
+            "num_anchors": num_anchors,
+        }
+
+    def forward(self, features: List[Tensor]):
+        """
+        Arguments:
+            features (list[Tensor]): FPN feature map tensors in high to low resolution.
+                Each tensor in the list correspond to different feature levels.
+
+        Returns:
+            logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi).
+                The tensor predicts the classification probability
+                at each spatial position for each of the A anchors and K object
+                classes.
+            bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi).
+                The tensor predicts 4-vector (dx,dy,dw,dh) box
+                regression values for every anchor. These values are the
+                relative offset between the anchor and the ground truth box.
+        """
+        logits = []
+        bbox_reg = []
+        for feature in features:
+            logits.append(self.cls_score(self.cls_subnet(feature)))
+            bbox_reg.append(self.bbox_pred(self.bbox_subnet(feature)))
+        return logits, bbox_reg
diff --git a/src/sts/detectron2/modeling/meta_arch/semantic_seg.py b/src/sts/detectron2/modeling/meta_arch/semantic_seg.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7ed07e3659d1fb7ffd143c30c4c3ac2e7fa523e
--- /dev/null
+++ b/src/sts/detectron2/modeling/meta_arch/semantic_seg.py
@@ -0,0 +1,250 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from typing import Callable, Dict, Optional, Tuple, Union
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.structures import ImageList
+from detectron2.utils.registry import Registry
+
+from ..backbone import Backbone, build_backbone
+from ..postprocessing import sem_seg_postprocess
+from .build import META_ARCH_REGISTRY
+
+__all__ = ["SemanticSegmentor", "SEM_SEG_HEADS_REGISTRY", "SemSegFPNHead", "build_sem_seg_head"]
+
+
+SEM_SEG_HEADS_REGISTRY = Registry("SEM_SEG_HEADS")
+SEM_SEG_HEADS_REGISTRY.__doc__ = """
+Registry for semantic segmentation heads, which make semantic segmentation predictions
+from feature maps.
+"""
+
+
+@META_ARCH_REGISTRY.register()
+class SemanticSegmentor(nn.Module):
+    """
+    Main class for semantic segmentation architectures.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        backbone: Backbone,
+        sem_seg_head: nn.Module,
+        pixel_mean: Tuple[float],
+        pixel_std: Tuple[float]
+    ):
+        """
+        Args:
+            backbone: a backbone module, must follow detectron2's backbone interface
+            sem_seg_head: a module that predicts semantic segmentation from backbone features
+            pixel_mean, pixel_std: list or tuple with #channels element, representing
+                the per-channel mean and std to be used to normalize the input image
+        """
+        super().__init__()
+        self.backbone = backbone
+        self.sem_seg_head = sem_seg_head
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+
+    @classmethod
+    def from_config(cls, cfg):
+        backbone = build_backbone(cfg)
+        sem_seg_head = build_sem_seg_head(cfg, backbone.output_shape())
+        return {
+            "backbone": backbone,
+            "sem_seg_head": sem_seg_head,
+            "pixel_mean": cfg.MODEL.PIXEL_MEAN,
+            "pixel_std": cfg.MODEL.PIXEL_STD,
+        }
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def forward(self, batched_inputs):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
+                Each item in the list contains the inputs for one image.
+
+                For now, each item in the list is a dict that contains:
+
+                   * "image": Tensor, image in (C, H, W) format.
+                   * "sem_seg": semantic segmentation ground truth
+                   * Other information that's included in the original dicts, such as:
+                     "height", "width" (int): the output resolution of the model (may be different
+                     from input resolution), used in inference.
+
+
+        Returns:
+            list[dict]:
+              Each dict is the output for one input image.
+              The dict contains one key "sem_seg" whose value is a
+              Tensor that represents the
+              per-pixel segmentation prediced by the head.
+              The prediction has shape KxHxW that represents the logits of
+              each class for each pixel.
+        """
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.backbone.size_divisibility)
+
+        features = self.backbone(images.tensor)
+
+        if "sem_seg" in batched_inputs[0]:
+            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
+            targets = ImageList.from_tensors(
+                targets, self.backbone.size_divisibility, self.sem_seg_head.ignore_value
+            ).tensor
+        else:
+            targets = None
+        results, losses = self.sem_seg_head(features, targets)
+
+        if self.training:
+            return losses
+
+        processed_results = []
+        for result, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes):
+            height = input_per_image.get("height")
+            width = input_per_image.get("width")
+            r = sem_seg_postprocess(result, image_size, height, width)
+            processed_results.append({"sem_seg": r})
+        return processed_results
+
+
+def build_sem_seg_head(cfg, input_shape):
+    """
+    Build a semantic segmentation head from `cfg.MODEL.SEM_SEG_HEAD.NAME`.
+    """
+    name = cfg.MODEL.SEM_SEG_HEAD.NAME
+    return SEM_SEG_HEADS_REGISTRY.get(name)(cfg, input_shape)
+
+
+@SEM_SEG_HEADS_REGISTRY.register()
+class SemSegFPNHead(nn.Module):
+    """
+    A semantic segmentation head described in :paper:`PanopticFPN`.
+    It takes a list of FPN features as input, and applies a sequence of
+    3x3 convs and upsampling to scale all of them to the stride defined by
+    ``common_stride``. Then these features are added and used to make final
+    predictions by another 1x1 conv layer.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        input_shape: Dict[str, ShapeSpec],
+        *,
+        num_classes: int,
+        conv_dims: int,
+        common_stride: int,
+        loss_weight: float = 1.0,
+        norm: Optional[Union[str, Callable]] = None,
+        ignore_value: int = -1
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape: shapes (channels and stride) of the input features
+            num_classes: number of classes to predict
+            conv_dims: number of output channels for the intermediate conv layers.
+            common_stride: the common stride that all features will be upscaled to
+            loss_weight: loss weight
+            norm (str or callable): normalization for all conv layers
+            ignore_value: category id to be ignored during training.
+        """
+        super().__init__()
+        input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride)
+        self.in_features = [k for k, v in input_shape]
+        feature_strides = [v.stride for k, v in input_shape]
+        feature_channels = [v.channels for k, v in input_shape]
+
+        self.ignore_value = ignore_value
+        self.common_stride = common_stride
+        self.loss_weight = loss_weight
+
+        self.scale_heads = []
+        for in_feature, stride, channels in zip(
+            self.in_features, feature_strides, feature_channels
+        ):
+            head_ops = []
+            head_length = max(1, int(np.log2(stride) - np.log2(self.common_stride)))
+            for k in range(head_length):
+                norm_module = get_norm(norm, conv_dims)
+                conv = Conv2d(
+                    channels if k == 0 else conv_dims,
+                    conv_dims,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias=not norm,
+                    norm=norm_module,
+                    activation=F.relu,
+                )
+                weight_init.c2_msra_fill(conv)
+                head_ops.append(conv)
+                if stride != self.common_stride:
+                    head_ops.append(
+                        nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
+                    )
+            self.scale_heads.append(nn.Sequential(*head_ops))
+            self.add_module(in_feature, self.scale_heads[-1])
+        self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
+        weight_init.c2_msra_fill(self.predictor)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
+        return {
+            "input_shape": {
+                k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
+            },
+            "ignore_value": cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
+            "num_classes": cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
+            "conv_dims": cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM,
+            "common_stride": cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE,
+            "norm": cfg.MODEL.SEM_SEG_HEAD.NORM,
+            "loss_weight": cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT,
+        }
+
+    def forward(self, features, targets=None):
+        """
+        Returns:
+            In training, returns (None, dict of losses)
+            In inference, returns (CxHxW logits, {})
+        """
+        x = self.layers(features)
+        if self.training:
+            return None, self.losses(x, targets)
+        else:
+            x = F.interpolate(
+                x, scale_factor=self.common_stride, mode="bilinear", align_corners=False
+            )
+            return x, {}
+
+    def layers(self, features):
+        for i, f in enumerate(self.in_features):
+            if i == 0:
+                x = self.scale_heads[i](features[f])
+            else:
+                x = x + self.scale_heads[i](features[f])
+        x = self.predictor(x)
+        return x
+
+    def losses(self, predictions, targets):
+        predictions = predictions.float()  # https://github.com/pytorch/pytorch/issues/48163
+        predictions = F.interpolate(
+            predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False
+        )
+        loss = F.cross_entropy(
+            predictions, targets, reduction="mean", ignore_index=self.ignore_value
+        )
+        losses = {"loss_sem_seg": loss * self.loss_weight}
+        return losses
diff --git a/src/sts/detectron2/modeling/mmdet_wrapper.py b/src/sts/detectron2/modeling/mmdet_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..7905fbcba481ebd176c7c4f3c323fd982706e7c1
--- /dev/null
+++ b/src/sts/detectron2/modeling/mmdet_wrapper.py
@@ -0,0 +1,270 @@
+# -*- coding: utf-8 -*-
+
+import itertools
+import logging
+import numpy as np
+from collections import OrderedDict
+from collections.abc import Mapping
+from typing import Dict, List, Optional, Tuple, Union
+import torch
+from omegaconf import DictConfig, OmegaConf
+from torch import Tensor, nn
+
+from detectron2.layers import ShapeSpec
+from detectron2.structures import BitMasks, Boxes, ImageList, Instances
+from detectron2.utils.events import get_event_storage
+
+from .backbone import Backbone
+
+logger = logging.getLogger(__name__)
+
+
+def _to_container(cfg):
+    """
+    mmdet will assert the type of dict/list.
+    So convert omegaconf objects to dict/list.
+    """
+    if isinstance(cfg, DictConfig):
+        cfg = OmegaConf.to_container(cfg, resolve=True)
+    from mmcv.utils import ConfigDict
+
+    return ConfigDict(cfg)
+
+
+class MMDetBackbone(Backbone):
+    """
+    Wrapper of mmdetection backbones to use in detectron2.
+
+    mmdet backbones produce list/tuple of tensors, while detectron2 backbones
+    produce a dict of tensors. This class wraps the given backbone to produce
+    output in detectron2's convention, so it can be used in place of detectron2
+    backbones.
+    """
+
+    def __init__(
+        self,
+        backbone: Union[nn.Module, Mapping],
+        neck: Union[nn.Module, Mapping, None] = None,
+        *,
+        pretrained_backbone: Optional[str] = None,
+        output_shapes: List[ShapeSpec],
+        output_names: Optional[List[str]] = None,
+    ):
+        """
+        Args:
+            backbone: either a backbone module or a mmdet config dict that defines a
+                backbone. The backbone takes a 4D image tensor and returns a
+                sequence of tensors.
+            neck: either a backbone module or a mmdet config dict that defines a
+                neck. The neck takes outputs of backbone and returns a
+                sequence of tensors. If None, no neck is used.
+            pretrained_backbone: defines the backbone weights that can be loaded by
+                mmdet, such as "torchvision://resnet50".
+            output_shapes: shape for every output of the backbone (or neck, if given).
+                stride and channels are often needed.
+            output_names: names for every output of the backbone (or neck, if given).
+                By default, will use "out0", "out1", ...
+        """
+        super().__init__()
+        if isinstance(backbone, Mapping):
+            from mmdet.models import build_backbone
+
+            backbone = build_backbone(_to_container(backbone))
+        self.backbone = backbone
+
+        if isinstance(neck, Mapping):
+            from mmdet.models import build_neck
+
+            neck = build_neck(_to_container(neck))
+        self.neck = neck
+
+        # It's confusing that backbone weights are given as a separate argument,
+        # but "neck" weights, if any, are part of neck itself. This is the interface
+        # of mmdet so we follow it. Reference:
+        # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py
+        logger.info(f"Initializing mmdet backbone weights: {pretrained_backbone} ...")
+        self.backbone.init_weights(pretrained_backbone)
+        # train() in mmdet modules is non-trivial, and has to be explicitly
+        # called. Reference:
+        # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/backbones/resnet.py
+        self.backbone.train()
+        if self.neck is not None:
+            logger.info("Initializing mmdet neck weights ...")
+            if isinstance(self.neck, nn.Sequential):
+                for m in self.neck:
+                    m.init_weights()
+            else:
+                self.neck.init_weights()
+            self.neck.train()
+
+        self._output_shapes = output_shapes
+        if not output_names:
+            output_names = [f"out{i}" for i in range(len(output_shapes))]
+        self._output_names = output_names
+
+    def forward(self, x) -> Dict[str, Tensor]:
+        outs = self.backbone(x)
+        if self.neck is not None:
+            outs = self.neck(outs)
+        assert isinstance(
+            outs, (list, tuple)
+        ), "mmdet backbone should return a list/tuple of tensors!"
+        if len(outs) != len(self._output_shapes):
+            raise ValueError(
+                "Length of output_shapes does not match outputs from the mmdet backbone: "
+                f"{len(outs)} != {len(self._output_shapes)}"
+            )
+        return {k: v for k, v in zip(self._output_names, outs)}
+
+    def output_shape(self) -> Dict[str, ShapeSpec]:
+        return {k: v for k, v in zip(self._output_names, self._output_shapes)}
+
+
+class MMDetDetector(nn.Module):
+    """
+    Wrapper of a mmdetection detector model, for detection and instance segmentation.
+    Input/output formats of this class follow detectron2's convention, so a
+    mmdetection model can be trained and evaluated in detectron2.
+    """
+
+    def __init__(
+        self,
+        detector: Union[nn.Module, Mapping],
+        *,
+        # Default is 32 regardless of model:
+        # https://github.com/open-mmlab/mmdetection/tree/master/configs/_base_/datasets
+        size_divisibility=32,
+        pixel_mean: Tuple[float],
+        pixel_std: Tuple[float],
+    ):
+        """
+        Args:
+            detector: a mmdet detector, or a mmdet config dict that defines a detector.
+            size_divisibility: pad input images to multiple of this number
+            pixel_mean: per-channel mean to normalize input image
+            pixel_std: per-channel stddev to normalize input image
+        """
+        super().__init__()
+        if isinstance(detector, Mapping):
+            from mmdet.models import build_detector
+
+            detector = build_detector(_to_container(detector))
+        self.detector = detector
+        self.size_divisibility = size_divisibility
+
+        self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False)
+        assert (
+            self.pixel_mean.shape == self.pixel_std.shape
+        ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!"
+
+    def forward(self, batched_inputs: Tuple[Dict[str, torch.Tensor]]):
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, size_divisibility=self.size_divisibility).tensor
+        metas = []
+        rescale = {"height" in x for x in batched_inputs}
+        if len(rescale) != 1:
+            raise ValueError("Some inputs have original height/width, but some don't!")
+        rescale = list(rescale)[0]
+        output_shapes = []
+        for input in batched_inputs:
+            meta = {}
+            c, h, w = input["image"].shape
+            meta["img_shape"] = meta["ori_shape"] = (h, w, c)
+            if rescale:
+                scale_factor = np.sqrt(h * w / (input["height"] * input["width"]))
+                ori_shape = (input["height"], input["width"])
+                output_shapes.append(ori_shape)
+                meta["ori_shape"] = ori_shape + (c,)
+            else:
+                scale_factor = 1.0
+                output_shapes.append((h, w))
+            meta["scale_factor"] = scale_factor
+            meta["flip"] = False
+            padh, padw = images.shape[-2:]
+            meta["pad_shape"] = (padh, padw, c)
+            metas.append(meta)
+
+        if self.training:
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+            if gt_instances[0].has("gt_masks"):
+                from mmdet.core import PolygonMasks as mm_PolygonMasks, BitmapMasks as mm_BitMasks
+
+                def convert_mask(m, shape):
+                    # mmdet mask format
+                    if isinstance(m, BitMasks):
+                        return mm_BitMasks(m.tensor.cpu().numpy(), shape[0], shape[1])
+                    else:
+                        return mm_PolygonMasks(m.polygons, shape[0], shape[1])
+
+                gt_masks = [convert_mask(x.gt_masks, x.image_size) for x in gt_instances]
+            else:
+                gt_masks = None
+            losses_and_metrics = self.detector.forward_train(
+                images,
+                metas,
+                [x.gt_boxes.tensor for x in gt_instances],
+                [x.gt_classes for x in gt_instances],
+                gt_masks=gt_masks,
+            )
+            return _parse_losses(losses_and_metrics)
+        else:
+            results = self.detector.simple_test(images, metas, rescale=rescale)
+            results = [
+                {"instances": _convert_mmdet_result(r, shape)}
+                for r, shape in zip(results, output_shapes)
+            ]
+            return results
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+
+# Reference: show_result() in
+# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/base.py
+def _convert_mmdet_result(result, shape: Tuple[int, int]) -> Instances:
+    if isinstance(result, tuple):
+        bbox_result, segm_result = result
+        if isinstance(segm_result, tuple):
+            segm_result = segm_result[0]
+    else:
+        bbox_result, segm_result = result, None
+
+    bboxes = torch.from_numpy(np.vstack(bbox_result))  # Nx5
+    bboxes, scores = bboxes[:, :4], bboxes[:, -1]
+    labels = [
+        torch.full((bbox.shape[0],), i, dtype=torch.int32) for i, bbox in enumerate(bbox_result)
+    ]
+    labels = torch.cat(labels)
+    inst = Instances(shape)
+    inst.pred_boxes = Boxes(bboxes)
+    inst.scores = scores
+    inst.pred_classes = labels
+
+    if segm_result is not None and len(labels) > 0:
+        segm_result = list(itertools.chain(*segm_result))
+        segm_result = [torch.from_numpy(x) if isinstance(x, np.ndarray) else x for x in segm_result]
+        segm_result = torch.stack(segm_result, dim=0)
+        inst.pred_masks = segm_result
+    return inst
+
+
+# reference: https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/base.py
+def _parse_losses(losses: Dict[str, Tensor]) -> Dict[str, Tensor]:
+    log_vars = OrderedDict()
+    for loss_name, loss_value in losses.items():
+        if isinstance(loss_value, torch.Tensor):
+            log_vars[loss_name] = loss_value.mean()
+        elif isinstance(loss_value, list):
+            log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+        else:
+            raise TypeError(f"{loss_name} is not a tensor or list of tensors")
+
+        if "loss" not in loss_name:
+            # put metrics to storage; don't return them
+            storage = get_event_storage()
+            value = log_vars.pop(loss_name).cpu().item()
+            storage.put_scalar(loss_name, value)
+    return log_vars
diff --git a/src/sts/detectron2/modeling/poolers.py b/src/sts/detectron2/modeling/poolers.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7ef14873abe48d3bcf9f3059149bc228fcf1d28
--- /dev/null
+++ b/src/sts/detectron2/modeling/poolers.py
@@ -0,0 +1,258 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+from typing import List
+import torch
+from torch import nn
+from torchvision.ops import RoIPool
+
+from detectron2.layers import ROIAlign, ROIAlignRotated, cat, nonzero_tuple
+from detectron2.structures import Boxes
+
+"""
+To export ROIPooler to torchscript, in this file, variables that should be annotated with
+`Union[List[Boxes], List[RotatedBoxes]]` are only annotated with `List[Boxes]`.
+
+TODO: Correct these annotations when torchscript support `Union`.
+https://github.com/pytorch/pytorch/issues/41412
+"""
+
+__all__ = ["ROIPooler"]
+
+
+def assign_boxes_to_levels(
+    box_lists: List[Boxes],
+    min_level: int,
+    max_level: int,
+    canonical_box_size: int,
+    canonical_level: int,
+):
+    """
+    Map each box in `box_lists` to a feature map level index and return the assignment
+    vector.
+
+    Args:
+        box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes,
+            where N is the number of images in the batch.
+        min_level (int): Smallest feature map level index. The input is considered index 0,
+            the output of stage 1 is index 1, and so.
+        max_level (int): Largest feature map level index.
+        canonical_box_size (int): A canonical box size in pixels (sqrt(box area)).
+        canonical_level (int): The feature map level index on which a canonically-sized box
+            should be placed.
+
+    Returns:
+        A tensor of length M, where M is the total number of boxes aggregated over all
+            N batch images. The memory layout corresponds to the concatenation of boxes
+            from all images. Each element is the feature map index, as an offset from
+            `self.min_level`, for the corresponding box (so value i means the box is at
+            `self.min_level + i`).
+    """
+    box_sizes = torch.sqrt(cat([boxes.area() for boxes in box_lists]))
+    # Eqn.(1) in FPN paper
+    level_assignments = torch.floor(
+        canonical_level + torch.log2(box_sizes / canonical_box_size + 1e-8)
+    )
+    # clamp level to (min, max), in case the box size is too large or too small
+    # for the available feature maps
+    level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level)
+    return level_assignments.to(torch.int64) - min_level
+
+
+def _fmt_box_list(box_tensor, batch_index: int):
+    repeated_index = torch.full_like(
+        box_tensor[:, :1], batch_index, dtype=box_tensor.dtype, device=box_tensor.device
+    )
+    return cat((repeated_index, box_tensor), dim=1)
+
+
+def convert_boxes_to_pooler_format(box_lists: List[Boxes]):
+    """
+    Convert all boxes in `box_lists` to the low-level format used by ROI pooling ops
+    (see description under Returns).
+
+    Args:
+        box_lists (list[Boxes] | list[RotatedBoxes]):
+            A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
+
+    Returns:
+        When input is list[Boxes]:
+            A tensor of shape (M, 5), where M is the total number of boxes aggregated over all
+            N batch images.
+            The 5 columns are (batch index, x0, y0, x1, y1), where batch index
+            is the index in [0, N) identifying which batch image the box with corners at
+            (x0, y0, x1, y1) comes from.
+        When input is list[RotatedBoxes]:
+            A tensor of shape (M, 6), where M is the total number of boxes aggregated over all
+            N batch images.
+            The 6 columns are (batch index, x_ctr, y_ctr, width, height, angle_degrees),
+            where batch index is the index in [0, N) identifying which batch image the
+            rotated box (x_ctr, y_ctr, width, height, angle_degrees) comes from.
+    """
+    pooler_fmt_boxes = cat(
+        [_fmt_box_list(box_list.tensor, i) for i, box_list in enumerate(box_lists)], dim=0
+    )
+
+    return pooler_fmt_boxes
+
+
+class ROIPooler(nn.Module):
+    """
+    Region of interest feature map pooler that supports pooling from one or more
+    feature maps.
+    """
+
+    def __init__(
+        self,
+        output_size,
+        scales,
+        sampling_ratio,
+        pooler_type,
+        canonical_box_size=224,
+        canonical_level=4,
+    ):
+        """
+        Args:
+            output_size (int, tuple[int] or list[int]): output size of the pooled region,
+                e.g., 14 x 14. If tuple or list is given, the length must be 2.
+            scales (list[float]): The scale for each low-level pooling op relative to
+                the input image. For a feature map with stride s relative to the input
+                image, scale is defined as 1/s. The stride must be power of 2.
+                When there are multiple scales, they must form a pyramid, i.e. they must be
+                a monotically decreasing geometric sequence with a factor of 1/2.
+            sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op.
+            pooler_type (string): Name of the type of pooling operation that should be applied.
+                For instance, "ROIPool" or "ROIAlignV2".
+            canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default
+                is heuristically defined as 224 pixels in the FPN paper (based on ImageNet
+                pre-training).
+            canonical_level (int): The feature map level index from which a canonically-sized box
+                should be placed. The default is defined as level 4 (stride=16) in the FPN paper,
+                i.e., a box of size 224x224 will be placed on the feature with stride=16.
+                The box placement for all boxes will be determined from their sizes w.r.t
+                canonical_box_size. For example, a box whose area is 4x that of a canonical box
+                should be used to pool features from feature level ``canonical_level+1``.
+
+                Note that the actual input feature maps given to this module may not have
+                sufficiently many levels for the input boxes. If the boxes are too large or too
+                small for the input feature maps, the closest level will be used.
+        """
+        super().__init__()
+
+        if isinstance(output_size, int):
+            output_size = (output_size, output_size)
+        assert len(output_size) == 2
+        assert isinstance(output_size[0], int) and isinstance(output_size[1], int)
+        self.output_size = output_size
+
+        if pooler_type == "ROIAlign":
+            self.level_poolers = nn.ModuleList(
+                ROIAlign(
+                    output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=False
+                )
+                for scale in scales
+            )
+        elif pooler_type == "ROIAlignV2":
+            self.level_poolers = nn.ModuleList(
+                ROIAlign(
+                    output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=True
+                )
+                for scale in scales
+            )
+        elif pooler_type == "ROIPool":
+            self.level_poolers = nn.ModuleList(
+                RoIPool(output_size, spatial_scale=scale) for scale in scales
+            )
+        elif pooler_type == "ROIAlignRotated":
+            self.level_poolers = nn.ModuleList(
+                ROIAlignRotated(output_size, spatial_scale=scale, sampling_ratio=sampling_ratio)
+                for scale in scales
+            )
+        else:
+            raise ValueError("Unknown pooler type: {}".format(pooler_type))
+
+        # Map scale (defined as 1 / stride) to its feature map level under the
+        # assumption that stride is a power of 2.
+        min_level = -(math.log2(scales[0]))
+        max_level = -(math.log2(scales[-1]))
+        assert math.isclose(min_level, int(min_level)) and math.isclose(
+            max_level, int(max_level)
+        ), "Featuremap stride is not power of 2!"
+        self.min_level = int(min_level)
+        self.max_level = int(max_level)
+        assert (
+            len(scales) == self.max_level - self.min_level + 1
+        ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!"
+        assert 0 <= self.min_level and self.min_level <= self.max_level
+        self.canonical_level = canonical_level
+        assert canonical_box_size > 0
+        self.canonical_box_size = canonical_box_size
+
+    def forward(self, x: List[torch.Tensor], box_lists: List[Boxes]):
+        """
+        Args:
+            x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
+                used to construct this module.
+            box_lists (list[Boxes] | list[RotatedBoxes]):
+                A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
+                The box coordinates are defined on the original image and
+                will be scaled by the `scales` argument of :class:`ROIPooler`.
+
+        Returns:
+            Tensor:
+                A tensor of shape (M, C, output_size, output_size) where M is the total number of
+                boxes aggregated over all N batch images and C is the number of channels in `x`.
+        """
+        num_level_assignments = len(self.level_poolers)
+
+        assert isinstance(x, list) and isinstance(
+            box_lists, list
+        ), "Arguments to pooler must be lists"
+        assert (
+            len(x) == num_level_assignments
+        ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
+            num_level_assignments, len(x)
+        )
+
+        assert len(box_lists) == x[0].size(
+            0
+        ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
+            x[0].size(0), len(box_lists)
+        )
+        if len(box_lists) == 0:
+            return torch.zeros(
+                (0, x[0].shape[1]) + self.output_size, device=x[0].device, dtype=x[0].dtype
+            )
+
+        pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)
+
+        if num_level_assignments == 1:
+            return self.level_poolers[0](x[0], pooler_fmt_boxes)
+
+        level_assignments = assign_boxes_to_levels(
+            box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level
+        )
+
+        num_boxes = pooler_fmt_boxes.size(0)
+        num_channels = x[0].shape[1]
+        if len(self.output_size) == 1:
+            output_size = self.output_size[0]
+
+            dtype, device = x[0].dtype, x[0].device
+            output = torch.zeros(
+                (num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device
+            )
+        else:
+            output_size = self.output_size[0]
+            output_size1 = self.output_size[1]
+            dtype, device = x[0].dtype, x[0].device
+            output = torch.zeros(
+                (num_boxes, num_channels, output_size, output_size1), dtype=dtype, device=device
+            )
+
+        for level, pooler in enumerate(self.level_poolers):
+            inds = nonzero_tuple(level_assignments == level)[0]
+            pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
+            # Use index_put_ instead of advance indexing, to avoid pytorch/issues/49852
+            output.index_put_((inds,), pooler(x[level], pooler_fmt_boxes_level))
+
+        return output
diff --git a/src/sts/detectron2/modeling/postprocessing.py b/src/sts/detectron2/modeling/postprocessing.py
new file mode 100644
index 0000000000000000000000000000000000000000..f42e77c52f15869dcfb426d12befa8837f404021
--- /dev/null
+++ b/src/sts/detectron2/modeling/postprocessing.py
@@ -0,0 +1,101 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+from torch.nn import functional as F
+
+from detectron2.layers import paste_masks_in_image
+from detectron2.structures import Instances
+from detectron2.utils.memory import retry_if_cuda_oom
+
+
+# perhaps should rename to "resize_instance"
+def detector_postprocess(
+    results: Instances, output_height: int, output_width: int, mask_threshold: float = 0.5
+):
+    """
+    Resize the output instances.
+    The input images are often resized when entering an object detector.
+    As a result, we often need the outputs of the detector in a different
+    resolution from its inputs.
+
+    This function will resize the raw outputs of an R-CNN detector
+    to produce outputs according to the desired output resolution.
+
+    Args:
+        results (Instances): the raw outputs from the detector.
+            `results.image_size` contains the input image resolution the detector sees.
+            This object might be modified in-place.
+        output_height, output_width: the desired output resolution.
+
+    Returns:
+        Instances: the resized output from the model, based on the output resolution
+    """
+    # Change to 'if is_tracing' after PT1.7
+    if isinstance(output_height, torch.Tensor):
+        # Converts integer tensors to float temporaries to ensure true
+        # division is performed when computing scale_x and scale_y.
+        output_width_tmp = output_width.float()
+        output_height_tmp = output_height.float()
+        new_size = torch.stack([output_height, output_width])
+    else:
+        new_size = (output_height, output_width)
+        output_width_tmp = output_width
+        output_height_tmp = output_height
+
+    scale_x, scale_y = (
+        output_width_tmp / results.image_size[1],
+        output_height_tmp / results.image_size[0],
+    )
+    results = Instances(new_size, **results.get_fields())
+
+    if results.has("pred_boxes"):
+        output_boxes = results.pred_boxes
+    elif results.has("proposal_boxes"):
+        output_boxes = results.proposal_boxes
+    else:
+        output_boxes = None
+    assert output_boxes is not None, "Predictions must contain boxes!"
+
+    output_boxes.scale(scale_x, scale_y)
+    output_boxes.clip(results.image_size)
+
+    results = results[output_boxes.nonempty()]
+
+    if results.has("pred_masks"):
+        results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
+            results.pred_masks[:, 0, :, :],  # N, 1, M, M
+            results.pred_boxes,
+            results.image_size,
+            threshold=mask_threshold,
+        )
+
+    if results.has("pred_keypoints"):
+        results.pred_keypoints[:, :, 0] *= scale_x
+        results.pred_keypoints[:, :, 1] *= scale_y
+
+    return results
+
+
+def sem_seg_postprocess(result, img_size, output_height, output_width):
+    """
+    Return semantic segmentation predictions in the original resolution.
+
+    The input images are often resized when entering semantic segmentor. Moreover, in same
+    cases, they also padded inside segmentor to be divisible by maximum network stride.
+    As a result, we often need the predictions of the segmentor in a different
+    resolution from its inputs.
+
+    Args:
+        result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
+            where C is the number of classes, and H, W are the height and width of the prediction.
+        img_size (tuple): image size that segmentor is taking as input.
+        output_height, output_width: the desired output resolution.
+
+    Returns:
+        semantic segmentation prediction (Tensor): A tensor of the shape
+            (C, output_height, output_width) that contains per-pixel soft predictions.
+    """
+    result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1)
+    result = F.interpolate(
+        result, size=(output_height, output_width), mode="bilinear", align_corners=False
+    )[0]
+    return result
diff --git a/src/sts/detectron2/modeling/proposal_generator/__init__.py b/src/sts/detectron2/modeling/proposal_generator/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..79c9acf84fd69e527deb89f1a54375fd71552ed8
--- /dev/null
+++ b/src/sts/detectron2/modeling/proposal_generator/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator
+from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN
+
+__all__ = list(globals().keys())
diff --git a/src/sts/detectron2/modeling/proposal_generator/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/modeling/proposal_generator/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5832aa3b09637c41d8ee82d057ff38804d624f6b
Binary files /dev/null and b/src/sts/detectron2/modeling/proposal_generator/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/proposal_generator/__pycache__/build.cpython-38.pyc b/src/sts/detectron2/modeling/proposal_generator/__pycache__/build.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b028bc5fbeed0f2928c6768acee28d9bf700f630
Binary files /dev/null and b/src/sts/detectron2/modeling/proposal_generator/__pycache__/build.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/proposal_generator/__pycache__/proposal_utils.cpython-38.pyc b/src/sts/detectron2/modeling/proposal_generator/__pycache__/proposal_utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0363be406a819127c6548c646454a74802720825
Binary files /dev/null and b/src/sts/detectron2/modeling/proposal_generator/__pycache__/proposal_utils.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/proposal_generator/__pycache__/rpn.cpython-38.pyc b/src/sts/detectron2/modeling/proposal_generator/__pycache__/rpn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58f361de3b90be5850f8a06b38ace06af6b96e09
Binary files /dev/null and b/src/sts/detectron2/modeling/proposal_generator/__pycache__/rpn.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/proposal_generator/__pycache__/rrpn.cpython-38.pyc b/src/sts/detectron2/modeling/proposal_generator/__pycache__/rrpn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c13f408234bb469c0eddf05848e49fbe6e56fcfe
Binary files /dev/null and b/src/sts/detectron2/modeling/proposal_generator/__pycache__/rrpn.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/proposal_generator/build.py b/src/sts/detectron2/modeling/proposal_generator/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..34eb12d00d94ff905b796e75e2c4c5845257c8e9
--- /dev/null
+++ b/src/sts/detectron2/modeling/proposal_generator/build.py
@@ -0,0 +1,24 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from detectron2.utils.registry import Registry
+
+PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR")
+PROPOSAL_GENERATOR_REGISTRY.__doc__ = """
+Registry for proposal generator, which produces object proposals from feature maps.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+The call should return a `nn.Module` object.
+"""
+
+from . import rpn, rrpn  # noqa F401 isort:skip
+
+
+def build_proposal_generator(cfg, input_shape):
+    """
+    Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`.
+    The name can be "PrecomputedProposals" to use no proposal generator.
+    """
+    name = cfg.MODEL.PROPOSAL_GENERATOR.NAME
+    if name == "PrecomputedProposals":
+        return None
+
+    return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
diff --git a/src/sts/detectron2/modeling/proposal_generator/proposal_utils.py b/src/sts/detectron2/modeling/proposal_generator/proposal_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c104367417d23756a6072de48891009e6766775
--- /dev/null
+++ b/src/sts/detectron2/modeling/proposal_generator/proposal_utils.py
@@ -0,0 +1,182 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import math
+from typing import List, Tuple
+import torch
+
+from detectron2.layers import batched_nms, cat
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.env import TORCH_VERSION
+
+logger = logging.getLogger(__name__)
+
+
+def _is_tracing():
+    if torch.jit.is_scripting():
+        # https://github.com/pytorch/pytorch/issues/47379
+        return False
+    else:
+        return TORCH_VERSION >= (1, 7) and torch.jit.is_tracing()
+
+
+def find_top_rpn_proposals(
+    proposals: List[torch.Tensor],
+    pred_objectness_logits: List[torch.Tensor],
+    image_sizes: List[Tuple[int, int]],
+    nms_thresh: float,
+    pre_nms_topk: int,
+    post_nms_topk: int,
+    min_box_size: float,
+    training: bool,
+):
+    """
+    For each feature map, select the `pre_nms_topk` highest scoring proposals,
+    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
+    highest scoring proposals among all the feature maps for each image.
+
+    Args:
+        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
+            All proposal predictions on the feature maps.
+        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
+        image_sizes (list[tuple]): sizes (h, w) for each image
+        nms_thresh (float): IoU threshold to use for NMS
+        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
+            When RPN is run on multiple feature maps (as in FPN) this number is per
+            feature map.
+        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
+            When RPN is run on multiple feature maps (as in FPN) this number is total,
+            over all feature maps.
+        min_box_size (float): minimum proposal box side length in pixels (absolute units
+            wrt input images).
+        training (bool): True if proposals are to be used in training, otherwise False.
+            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
+            comment.
+
+    Returns:
+        list[Instances]: list of N Instances. The i-th Instances
+            stores post_nms_topk object proposals for image i, sorted by their
+            objectness score in descending order.
+    """
+    num_images = len(image_sizes)
+    device = proposals[0].device
+
+    # 1. Select top-k anchor for every level and every image
+    topk_scores = []  # #lvl Tensor, each of shape N x topk
+    topk_proposals = []
+    level_ids = []  # #lvl Tensor, each of shape (topk,)
+    batch_idx = torch.arange(num_images, device=device)
+    for level_id, (proposals_i, logits_i) in enumerate(zip(proposals, pred_objectness_logits)):
+        Hi_Wi_A = logits_i.shape[1]
+        if isinstance(Hi_Wi_A, torch.Tensor):  # it's a tensor in tracing
+            num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk)
+        else:
+            num_proposals_i = min(Hi_Wi_A, pre_nms_topk)
+
+        # sort is faster than topk: https://github.com/pytorch/pytorch/issues/22812
+        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
+        logits_i, idx = logits_i.sort(descending=True, dim=1)
+        topk_scores_i = logits_i.narrow(1, 0, num_proposals_i)
+        topk_idx = idx.narrow(1, 0, num_proposals_i)
+
+        # each is N x topk
+        topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx]  # N x topk x 4
+
+        topk_proposals.append(topk_proposals_i)
+        topk_scores.append(topk_scores_i)
+        level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device))
+
+    # 2. Concat all levels together
+    topk_scores = cat(topk_scores, dim=1)
+    topk_proposals = cat(topk_proposals, dim=1)
+    level_ids = cat(level_ids, dim=0)
+
+    # 3. For each image, run a per-level NMS, and choose topk results.
+    results: List[Instances] = []
+    for n, image_size in enumerate(image_sizes):
+        boxes = Boxes(topk_proposals[n])
+        scores_per_img = topk_scores[n]
+        lvl = level_ids
+
+        valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
+        if not valid_mask.all():
+            if training:
+                raise FloatingPointError(
+                    "Predicted boxes or scores contain Inf/NaN. Training has diverged."
+                )
+            boxes = boxes[valid_mask]
+            scores_per_img = scores_per_img[valid_mask]
+            lvl = lvl[valid_mask]
+        boxes.clip(image_size)
+
+        # filter empty boxes
+        keep = boxes.nonempty(threshold=min_box_size)
+        if _is_tracing() or keep.sum().item() != len(boxes):
+            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep]
+
+        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
+        # In Detectron1, there was different behavior during training vs. testing.
+        # (https://github.com/facebookresearch/Detectron/issues/459)
+        # During training, topk is over the proposals from *all* images in the training batch.
+        # During testing, it is over the proposals for each image separately.
+        # As a result, the training behavior becomes batch-dependent,
+        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
+        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
+        keep = keep[:post_nms_topk]  # keep is already sorted
+
+        res = Instances(image_size)
+        res.proposal_boxes = boxes[keep]
+        res.objectness_logits = scores_per_img[keep]
+        results.append(res)
+    return results
+
+
+def add_ground_truth_to_proposals(gt_boxes, proposals):
+    """
+    Call `add_ground_truth_to_proposals_single_image` for all images.
+
+    Args:
+        gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes
+            representing the gound-truth for image i.
+        proposals (list[Instances]): list of N elements. Element i is a Instances
+            representing the proposals for image i.
+
+    Returns:
+        list[Instances]: list of N Instances. Each is the proposals for the image,
+            with field "proposal_boxes" and "objectness_logits".
+    """
+    assert gt_boxes is not None
+
+    assert len(proposals) == len(gt_boxes)
+    if len(proposals) == 0:
+        return proposals
+
+    return [
+        add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i)
+        for gt_boxes_i, proposals_i in zip(gt_boxes, proposals)
+    ]
+
+
+def add_ground_truth_to_proposals_single_image(gt_boxes, proposals):
+    """
+    Augment `proposals` with ground-truth boxes from `gt_boxes`.
+
+    Args:
+        Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals
+        per image.
+
+    Returns:
+        Same as `add_ground_truth_to_proposals`, but for only one image.
+    """
+    device = proposals.objectness_logits.device
+    # Assign all ground-truth boxes an objectness logit corresponding to
+    # P(object) = sigmoid(logit) =~ 1.
+    gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
+    gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)
+
+    # Concatenating gt_boxes with proposals requires them to have the same fields
+    gt_proposal = Instances(proposals.image_size)
+    gt_proposal.proposal_boxes = gt_boxes
+    gt_proposal.objectness_logits = gt_logits
+    new_proposals = Instances.cat([proposals, gt_proposal])
+
+    return new_proposals
diff --git a/src/sts/detectron2/modeling/proposal_generator/rpn.py b/src/sts/detectron2/modeling/proposal_generator/rpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..99cd536d2f9880d2049390c45f73eb22335e1b82
--- /dev/null
+++ b/src/sts/detectron2/modeling/proposal_generator/rpn.py
@@ -0,0 +1,533 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import Dict, List, Optional, Tuple, Union
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ShapeSpec, cat
+from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.memory import retry_if_cuda_oom
+from detectron2.utils.registry import Registry
+
+from ..anchor_generator import build_anchor_generator
+from ..box_regression import Box2BoxTransform, _dense_box_regression_loss
+from ..matcher import Matcher
+from ..sampling import subsample_labels
+from .build import PROPOSAL_GENERATOR_REGISTRY
+from .proposal_utils import find_top_rpn_proposals
+
+RPN_HEAD_REGISTRY = Registry("RPN_HEAD")
+RPN_HEAD_REGISTRY.__doc__ = """
+Registry for RPN heads, which take feature maps and perform
+objectness classification and bounding box regression for anchors.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+The call should return a `nn.Module` object.
+"""
+
+
+"""
+Shape shorthand in this module:
+
+    N: number of images in the minibatch
+    L: number of feature maps per image on which RPN is run
+    A: number of cell anchors (must be the same for all feature maps)
+    Hi, Wi: height and width of the i-th feature map
+    B: size of the box parameterization
+
+Naming convention:
+
+    objectness: refers to the binary classification of an anchor as object vs. not object.
+
+    deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box
+    transform (see :class:`box_regression.Box2BoxTransform`), or 5d for rotated boxes.
+
+    pred_objectness_logits: predicted objectness scores in [-inf, +inf]; use
+        sigmoid(pred_objectness_logits) to estimate P(object).
+
+    gt_labels: ground-truth binary classification labels for objectness
+
+    pred_anchor_deltas: predicted box2box transform deltas
+
+    gt_anchor_deltas: ground-truth box2box transform deltas
+"""
+
+
+def build_rpn_head(cfg, input_shape):
+    """
+    Build an RPN head defined by `cfg.MODEL.RPN.HEAD_NAME`.
+    """
+    name = cfg.MODEL.RPN.HEAD_NAME
+    return RPN_HEAD_REGISTRY.get(name)(cfg, input_shape)
+
+
+@RPN_HEAD_REGISTRY.register()
+class StandardRPNHead(nn.Module):
+    """
+    Standard RPN classification and regression heads described in :paper:`Faster R-CNN`.
+    Uses a 3x3 conv to produce a shared hidden state from which one 1x1 conv predicts
+    objectness logits for each anchor and a second 1x1 conv predicts bounding-box deltas
+    specifying how to deform each anchor into an object proposal.
+    """
+
+    @configurable
+    def __init__(
+        self, *, in_channels: int, num_anchors: int, box_dim: int = 4, conv_dims: List[int] = (-1,)
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            in_channels (int): number of input feature channels. When using multiple
+                input features, they must have the same number of channels.
+            num_anchors (int): number of anchors to predict for *each spatial position*
+                on the feature map. The total number of anchors for each
+                feature map will be `num_anchors * H * W`.
+            box_dim (int): dimension of a box, which is also the number of box regression
+                predictions to make for each anchor. An axis aligned box has
+                box_dim=4, while a rotated box has box_dim=5.
+            conv_dims (list[int]): a list of integers representing the output channels
+                of N conv layers. Set it to -1 to use the same number of output channels
+                as input channels.
+        """
+        super().__init__()
+        cur_channels = in_channels
+        # Keeping the old variable names and structure for backwards compatiblity.
+        # Otherwise the old checkpoints will fail to load.
+        if len(conv_dims) == 1:
+            out_channels = cur_channels if conv_dims[0] == -1 else conv_dims[0]
+            # 3x3 conv for the hidden representation
+            self.conv = self._get_rpn_conv(cur_channels, out_channels)
+            cur_channels = out_channels
+        else:
+            self.conv = nn.Sequential()
+            for k, conv_dim in enumerate(conv_dims):
+                out_channels = cur_channels if conv_dim == -1 else conv_dim
+                if out_channels <= 0:
+                    raise ValueError(
+                        f"Conv output channels should be greater than 0. Got {out_channels}"
+                    )
+                conv = self._get_rpn_conv(cur_channels, out_channels)
+                self.conv.add_module(f"conv{k}", conv)
+                cur_channels = out_channels
+        # 1x1 conv for predicting objectness logits
+        self.objectness_logits = nn.Conv2d(cur_channels, num_anchors, kernel_size=1, stride=1)
+        # 1x1 conv for predicting box2box transform deltas
+        self.anchor_deltas = nn.Conv2d(cur_channels, num_anchors * box_dim, kernel_size=1, stride=1)
+
+        # Keeping the order of weights initialization same for backwards compatiblility.
+        for layer in self.modules():
+            if isinstance(layer, nn.Conv2d):
+                nn.init.normal_(layer.weight, std=0.01)
+                nn.init.constant_(layer.bias, 0)
+
+    def _get_rpn_conv(self, in_channels, out_channels):
+        return Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            activation=nn.ReLU(),
+        )
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        # Standard RPN is shared across levels:
+        in_channels = [s.channels for s in input_shape]
+        assert len(set(in_channels)) == 1, "Each level must have the same channel!"
+        in_channels = in_channels[0]
+
+        # RPNHead should take the same input as anchor generator
+        # NOTE: it assumes that creating an anchor generator does not have unwanted side effect.
+        anchor_generator = build_anchor_generator(cfg, input_shape)
+        num_anchors = anchor_generator.num_anchors
+        box_dim = anchor_generator.box_dim
+        assert (
+            len(set(num_anchors)) == 1
+        ), "Each level must have the same number of anchors per spatial position"
+        return {
+            "in_channels": in_channels,
+            "num_anchors": num_anchors[0],
+            "box_dim": box_dim,
+            "conv_dims": cfg.MODEL.RPN.CONV_DIMS,
+        }
+
+    def forward(self, features: List[torch.Tensor]):
+        """
+        Args:
+            features (list[Tensor]): list of feature maps
+
+        Returns:
+            list[Tensor]: A list of L elements.
+                Element i is a tensor of shape (N, A, Hi, Wi) representing
+                the predicted objectness logits for all anchors. A is the number of cell anchors.
+            list[Tensor]: A list of L elements. Element i is a tensor of shape
+                (N, A*box_dim, Hi, Wi) representing the predicted "deltas" used to transform anchors
+                to proposals.
+        """
+        pred_objectness_logits = []
+        pred_anchor_deltas = []
+        for x in features:
+            t = self.conv(x)
+            pred_objectness_logits.append(self.objectness_logits(t))
+            pred_anchor_deltas.append(self.anchor_deltas(t))
+        return pred_objectness_logits, pred_anchor_deltas
+
+
+@PROPOSAL_GENERATOR_REGISTRY.register()
+class RPN(nn.Module):
+    """
+    Region Proposal Network, introduced by :paper:`Faster R-CNN`.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        in_features: List[str],
+        head: nn.Module,
+        anchor_generator: nn.Module,
+        anchor_matcher: Matcher,
+        box2box_transform: Box2BoxTransform,
+        batch_size_per_image: int,
+        positive_fraction: float,
+        pre_nms_topk: Tuple[float, float],
+        post_nms_topk: Tuple[float, float],
+        nms_thresh: float = 0.7,
+        min_box_size: float = 0.0,
+        anchor_boundary_thresh: float = -1.0,
+        loss_weight: Union[float, Dict[str, float]] = 1.0,
+        box_reg_loss_type: str = "smooth_l1",
+        smooth_l1_beta: float = 0.0,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            in_features (list[str]): list of names of input features to use
+            head (nn.Module): a module that predicts logits and regression deltas
+                for each level from a list of per-level features
+            anchor_generator (nn.Module): a module that creates anchors from a
+                list of features. Usually an instance of :class:`AnchorGenerator`
+            anchor_matcher (Matcher): label the anchors by matching them with ground truth.
+            box2box_transform (Box2BoxTransform): defines the transform from anchors boxes to
+                instance boxes
+            batch_size_per_image (int): number of anchors per image to sample for training
+            positive_fraction (float): fraction of foreground anchors to sample for training
+            pre_nms_topk (tuple[float]): (train, test) that represents the
+                number of top k proposals to select before NMS, in
+                training and testing.
+            post_nms_topk (tuple[float]): (train, test) that represents the
+                number of top k proposals to select after NMS, in
+                training and testing.
+            nms_thresh (float): NMS threshold used to de-duplicate the predicted proposals
+            min_box_size (float): remove proposal boxes with any side smaller than this threshold,
+                in the unit of input image pixels
+            anchor_boundary_thresh (float): legacy option
+            loss_weight (float|dict): weights to use for losses. Can be single float for weighting
+                all rpn losses together, or a dict of individual weightings. Valid dict keys are:
+                    "loss_rpn_cls" - applied to classification loss
+                    "loss_rpn_loc" - applied to box regression loss
+            box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou".
+            smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to
+                use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1"
+        """
+        super().__init__()
+        self.in_features = in_features
+        self.rpn_head = head
+        self.anchor_generator = anchor_generator
+        self.anchor_matcher = anchor_matcher
+        self.box2box_transform = box2box_transform
+        self.batch_size_per_image = batch_size_per_image
+        self.positive_fraction = positive_fraction
+        # Map from self.training state to train/test settings
+        self.pre_nms_topk = {True: pre_nms_topk[0], False: pre_nms_topk[1]}
+        self.post_nms_topk = {True: post_nms_topk[0], False: post_nms_topk[1]}
+        self.nms_thresh = nms_thresh
+        self.min_box_size = float(min_box_size)
+        self.anchor_boundary_thresh = anchor_boundary_thresh
+        if isinstance(loss_weight, float):
+            loss_weight = {"loss_rpn_cls": loss_weight, "loss_rpn_loc": loss_weight}
+        self.loss_weight = loss_weight
+        self.box_reg_loss_type = box_reg_loss_type
+        self.smooth_l1_beta = smooth_l1_beta
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
+        in_features = cfg.MODEL.RPN.IN_FEATURES
+        ret = {
+            "in_features": in_features,
+            "min_box_size": cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE,
+            "nms_thresh": cfg.MODEL.RPN.NMS_THRESH,
+            "batch_size_per_image": cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE,
+            "positive_fraction": cfg.MODEL.RPN.POSITIVE_FRACTION,
+            "loss_weight": {
+                "loss_rpn_cls": cfg.MODEL.RPN.LOSS_WEIGHT,
+                "loss_rpn_loc": cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * cfg.MODEL.RPN.LOSS_WEIGHT,
+            },
+            "anchor_boundary_thresh": cfg.MODEL.RPN.BOUNDARY_THRESH,
+            "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS),
+            "box_reg_loss_type": cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE,
+            "smooth_l1_beta": cfg.MODEL.RPN.SMOOTH_L1_BETA,
+        }
+
+        ret["pre_nms_topk"] = (cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN, cfg.MODEL.RPN.PRE_NMS_TOPK_TEST)
+        ret["post_nms_topk"] = (cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN, cfg.MODEL.RPN.POST_NMS_TOPK_TEST)
+
+        ret["anchor_generator"] = build_anchor_generator(cfg, [input_shape[f] for f in in_features])
+        ret["anchor_matcher"] = Matcher(
+            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True
+        )
+        ret["head"] = build_rpn_head(cfg, [input_shape[f] for f in in_features])
+        return ret
+
+    def _subsample_labels(self, label):
+        """
+        Randomly sample a subset of positive and negative examples, and overwrite
+        the label vector to the ignore value (-1) for all elements that are not
+        included in the sample.
+
+        Args:
+            labels (Tensor): a vector of -1, 0, 1. Will be modified in-place and returned.
+        """
+        pos_idx, neg_idx = subsample_labels(
+            label, self.batch_size_per_image, self.positive_fraction, 0
+        )
+        # Fill with the ignore label (-1), then set positive and negative labels
+        label.fill_(-1)
+        label.scatter_(0, pos_idx, 1)
+        label.scatter_(0, neg_idx, 0)
+        return label
+
+    @torch.jit.unused
+    @torch.no_grad()
+    def label_and_sample_anchors(
+        self, anchors: List[Boxes], gt_instances: List[Instances]
+    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+        """
+        Args:
+            anchors (list[Boxes]): anchors for each feature map.
+            gt_instances: the ground-truth instances for each image.
+
+        Returns:
+            list[Tensor]:
+                List of #img tensors. i-th element is a vector of labels whose length is
+                the total number of anchors across all feature maps R = sum(Hi * Wi * A).
+                Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative
+                class; 1 = positive class.
+            list[Tensor]:
+                i-th element is a Rx4 tensor. The values are the matched gt boxes for each
+                anchor. Values are undefined for those anchors not labeled as 1.
+        """
+        anchors = Boxes.cat(anchors)
+
+        gt_boxes = [x.gt_boxes for x in gt_instances]
+        image_sizes = [x.image_size for x in gt_instances]
+        del gt_instances
+
+        gt_labels = []
+        matched_gt_boxes = []
+        for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes):
+            """
+            image_size_i: (h, w) for the i-th image
+            gt_boxes_i: ground-truth boxes for i-th image
+            """
+
+            match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors)
+            matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix)
+            # Matching is memory-expensive and may result in CPU tensors. But the result is small
+            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)
+            del match_quality_matrix
+
+            if self.anchor_boundary_thresh >= 0:
+                # Discard anchors that go out of the boundaries of the image
+                # NOTE: This is legacy functionality that is turned off by default in Detectron2
+                anchors_inside_image = anchors.inside_box(image_size_i, self.anchor_boundary_thresh)
+                gt_labels_i[~anchors_inside_image] = -1
+
+            # A vector of labels (-1, 0, 1) for each anchor
+            gt_labels_i = self._subsample_labels(gt_labels_i)
+
+            if len(gt_boxes_i) == 0:
+                # These values won't be used anyway since the anchor is labeled as background
+                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
+            else:
+                # TODO wasted indexing computation for ignored boxes
+                matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor
+
+            gt_labels.append(gt_labels_i)  # N,AHW
+            matched_gt_boxes.append(matched_gt_boxes_i)
+        return gt_labels, matched_gt_boxes
+
+    @torch.jit.unused
+    def losses(
+        self,
+        anchors: List[Boxes],
+        pred_objectness_logits: List[torch.Tensor],
+        gt_labels: List[torch.Tensor],
+        pred_anchor_deltas: List[torch.Tensor],
+        gt_boxes: List[torch.Tensor],
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Return the losses from a set of RPN predictions and their associated ground-truth.
+
+        Args:
+            anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each
+                has shape (Hi*Wi*A, B), where B is box dimension (4 or 5).
+            pred_objectness_logits (list[Tensor]): A list of L elements.
+                Element i is a tensor of shape (N, Hi*Wi*A) representing
+                the predicted objectness logits for all anchors.
+            gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`.
+            pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape
+                (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors
+                to proposals.
+            gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`.
+
+        Returns:
+            dict[loss name -> loss value]: A dict mapping from loss name to loss value.
+                Loss names are: `loss_rpn_cls` for objectness classification and
+                `loss_rpn_loc` for proposal localization.
+        """
+        num_images = len(gt_labels)
+        gt_labels = torch.stack(gt_labels)  # (N, sum(Hi*Wi*Ai))
+
+        # Log the number of positive/negative anchors per-image that's used in training
+        pos_mask = gt_labels == 1
+        num_pos_anchors = pos_mask.sum().item()
+        num_neg_anchors = (gt_labels == 0).sum().item()
+        storage = get_event_storage()
+        storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images)
+        storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images)
+
+        localization_loss = _dense_box_regression_loss(
+            anchors,
+            self.box2box_transform,
+            pred_anchor_deltas,
+            gt_boxes,
+            pos_mask,
+            box_reg_loss_type=self.box_reg_loss_type,
+            smooth_l1_beta=self.smooth_l1_beta,
+        )
+
+        valid_mask = gt_labels >= 0
+        objectness_loss = F.binary_cross_entropy_with_logits(
+            cat(pred_objectness_logits, dim=1)[valid_mask],
+            gt_labels[valid_mask].to(torch.float32),
+            reduction="sum",
+        )
+        normalizer = self.batch_size_per_image * num_images
+        losses = {
+            "loss_rpn_cls": objectness_loss / normalizer,
+            # The original Faster R-CNN paper uses a slightly different normalizer
+            # for loc loss. But it doesn't matter in practice
+            "loss_rpn_loc": localization_loss / normalizer,
+        }
+        losses = {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()}
+        return losses
+
+    def forward(
+        self,
+        images: ImageList,
+        features: Dict[str, torch.Tensor],
+        gt_instances: Optional[List[Instances]] = None,
+    ):
+        """
+        Args:
+            images (ImageList): input images of length `N`
+            features (dict[str, Tensor]): input data as a mapping from feature
+                map name to tensor. Axis 0 represents the number of images `N` in
+                the input data; axes 1-3 are channels, height, and width, which may
+                vary between feature maps (e.g., if a feature pyramid is used).
+            gt_instances (list[Instances], optional): a length `N` list of `Instances`s.
+                Each `Instances` stores ground-truth instances for the corresponding image.
+
+        Returns:
+            proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits"
+            loss: dict[Tensor] or None
+        """
+        features = [features[f] for f in self.in_features]
+        anchors = self.anchor_generator(features)
+
+        pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features)
+        # Transpose the Hi*Wi*A dimension to the middle:
+        pred_objectness_logits = [
+            # (N, A, Hi, Wi) -> (N, Hi, Wi, A) -> (N, Hi*Wi*A)
+            score.permute(0, 2, 3, 1).flatten(1)
+            for score in pred_objectness_logits
+        ]
+        pred_anchor_deltas = [
+            # (N, A*B, Hi, Wi) -> (N, A, B, Hi, Wi) -> (N, Hi, Wi, A, B) -> (N, Hi*Wi*A, B)
+            x.view(x.shape[0], -1, self.anchor_generator.box_dim, x.shape[-2], x.shape[-1])
+            .permute(0, 3, 4, 1, 2)
+            .flatten(1, -2)
+            for x in pred_anchor_deltas
+        ]
+
+        if self.training:
+            assert gt_instances is not None, "RPN requires gt_instances in training!"
+            gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances)
+            losses = self.losses(
+                anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes
+            )
+        else:
+            losses = {}
+        proposals = self.predict_proposals(
+            anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes
+        )
+        return proposals, losses
+
+    def predict_proposals(
+        self,
+        anchors: List[Boxes],
+        pred_objectness_logits: List[torch.Tensor],
+        pred_anchor_deltas: List[torch.Tensor],
+        image_sizes: List[Tuple[int, int]],
+    ):
+        """
+        Decode all the predicted box regression deltas to proposals. Find the top proposals
+        by applying NMS and removing boxes that are too small.
+
+        Returns:
+            proposals (list[Instances]): list of N Instances. The i-th Instances
+                stores post_nms_topk object proposals for image i, sorted by their
+                objectness score in descending order.
+        """
+        # The proposals are treated as fixed for joint training with roi heads.
+        # This approach ignores the derivative w.r.t. the proposal boxes’ coordinates that
+        # are also network responses.
+        with torch.no_grad():
+            pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
+            return find_top_rpn_proposals(
+                pred_proposals,
+                pred_objectness_logits,
+                image_sizes,
+                self.nms_thresh,
+                self.pre_nms_topk[self.training],
+                self.post_nms_topk[self.training],
+                self.min_box_size,
+                self.training,
+            )
+
+    def _decode_proposals(self, anchors: List[Boxes], pred_anchor_deltas: List[torch.Tensor]):
+        """
+        Transform anchors into proposals by applying the predicted anchor deltas.
+
+        Returns:
+            proposals (list[Tensor]): A list of L tensors. Tensor i has shape
+                (N, Hi*Wi*A, B)
+        """
+        N = pred_anchor_deltas[0].shape[0]
+        proposals = []
+        # For each feature map
+        for anchors_i, pred_anchor_deltas_i in zip(anchors, pred_anchor_deltas):
+            B = anchors_i.tensor.size(1)
+            pred_anchor_deltas_i = pred_anchor_deltas_i.reshape(-1, B)
+            # Expand anchors to shape (N*Hi*Wi*A, B)
+            anchors_i = anchors_i.tensor.unsqueeze(0).expand(N, -1, -1).reshape(-1, B)
+            proposals_i = self.box2box_transform.apply_deltas(pred_anchor_deltas_i, anchors_i)
+            # Append feature map proposals with shape (N, Hi*Wi*A, B)
+            proposals.append(proposals_i.view(N, -1, B))
+        return proposals
diff --git a/src/sts/detectron2/modeling/proposal_generator/rrpn.py b/src/sts/detectron2/modeling/proposal_generator/rrpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ee4d8fd70430c5242cc02a1df8400493ffd75b7
--- /dev/null
+++ b/src/sts/detectron2/modeling/proposal_generator/rrpn.py
@@ -0,0 +1,203 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import logging
+from typing import Dict, List
+import torch
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec, batched_nms_rotated, cat
+from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated
+from detectron2.utils.memory import retry_if_cuda_oom
+
+from ..box_regression import Box2BoxTransformRotated
+from .build import PROPOSAL_GENERATOR_REGISTRY
+from .rpn import RPN
+
+logger = logging.getLogger(__name__)
+
+
+def find_top_rrpn_proposals(
+    proposals,
+    pred_objectness_logits,
+    image_sizes,
+    nms_thresh,
+    pre_nms_topk,
+    post_nms_topk,
+    min_box_size,
+    training,
+):
+    """
+    For each feature map, select the `pre_nms_topk` highest scoring proposals,
+    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
+    highest scoring proposals among all the feature maps if `training` is True,
+    otherwise, returns the highest `post_nms_topk` scoring proposals for each
+    feature map.
+
+    Args:
+        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 5).
+            All proposal predictions on the feature maps.
+        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
+        image_sizes (list[tuple]): sizes (h, w) for each image
+        nms_thresh (float): IoU threshold to use for NMS
+        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
+            When RRPN is run on multiple feature maps (as in FPN) this number is per
+            feature map.
+        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
+            When RRPN is run on multiple feature maps (as in FPN) this number is total,
+            over all feature maps.
+        min_box_size(float): minimum proposal box side length in pixels (absolute units wrt
+            input images).
+        training (bool): True if proposals are to be used in training, otherwise False.
+            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
+            comment.
+
+    Returns:
+        proposals (list[Instances]): list of N Instances. The i-th Instances
+            stores post_nms_topk object proposals for image i.
+    """
+    num_images = len(image_sizes)
+    device = proposals[0].device
+
+    # 1. Select top-k anchor for every level and every image
+    topk_scores = []  # #lvl Tensor, each of shape N x topk
+    topk_proposals = []
+    level_ids = []  # #lvl Tensor, each of shape (topk,)
+    batch_idx = torch.arange(num_images, device=device)
+    for level_id, proposals_i, logits_i in zip(
+        itertools.count(), proposals, pred_objectness_logits
+    ):
+        Hi_Wi_A = logits_i.shape[1]
+        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)
+
+        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
+        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
+        logits_i, idx = logits_i.sort(descending=True, dim=1)
+        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
+        topk_idx = idx[batch_idx, :num_proposals_i]
+
+        # each is N x topk
+        topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx]  # N x topk x 5
+
+        topk_proposals.append(topk_proposals_i)
+        topk_scores.append(topk_scores_i)
+        level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device))
+
+    # 2. Concat all levels together
+    topk_scores = cat(topk_scores, dim=1)
+    topk_proposals = cat(topk_proposals, dim=1)
+    level_ids = cat(level_ids, dim=0)
+
+    # 3. For each image, run a per-level NMS, and choose topk results.
+    results = []
+    for n, image_size in enumerate(image_sizes):
+        boxes = RotatedBoxes(topk_proposals[n])
+        scores_per_img = topk_scores[n]
+        valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
+        if not valid_mask.all():
+            boxes = boxes[valid_mask]
+            scores_per_img = scores_per_img[valid_mask]
+        boxes.clip(image_size)
+
+        # filter empty boxes
+        keep = boxes.nonempty(threshold=min_box_size)
+        lvl = level_ids
+        if keep.sum().item() != len(boxes):
+            boxes, scores_per_img, lvl = (boxes[keep], scores_per_img[keep], level_ids[keep])
+
+        keep = batched_nms_rotated(boxes.tensor, scores_per_img, lvl, nms_thresh)
+        # In Detectron1, there was different behavior during training vs. testing.
+        # (https://github.com/facebookresearch/Detectron/issues/459)
+        # During training, topk is over the proposals from *all* images in the training batch.
+        # During testing, it is over the proposals for each image separately.
+        # As a result, the training behavior becomes batch-dependent,
+        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
+        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
+        keep = keep[:post_nms_topk]
+
+        res = Instances(image_size)
+        res.proposal_boxes = boxes[keep]
+        res.objectness_logits = scores_per_img[keep]
+        results.append(res)
+    return results
+
+
+@PROPOSAL_GENERATOR_REGISTRY.register()
+class RRPN(RPN):
+    """
+    Rotated Region Proposal Network described in :paper:`RRPN`.
+    """
+
+    @configurable
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if self.anchor_boundary_thresh >= 0:
+            raise NotImplementedError(
+                "anchor_boundary_thresh is a legacy option not implemented for RRPN."
+            )
+
+    @classmethod
+    def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
+        ret = super().from_config(cfg, input_shape)
+        ret["box2box_transform"] = Box2BoxTransformRotated(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
+        return ret
+
+    @torch.no_grad()
+    def label_and_sample_anchors(self, anchors: List[RotatedBoxes], gt_instances: List[Instances]):
+        """
+        Args:
+            anchors (list[RotatedBoxes]): anchors for each feature map.
+            gt_instances: the ground-truth instances for each image.
+
+        Returns:
+            list[Tensor]:
+                List of #img tensors. i-th element is a vector of labels whose length is
+                the total number of anchors across feature maps. Label values are in {-1, 0, 1},
+                with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
+            list[Tensor]:
+                i-th element is a Nx5 tensor, where N is the total number of anchors across
+                feature maps.  The values are the matched gt boxes for each anchor.
+                Values are undefined for those anchors not labeled as 1.
+        """
+        anchors = RotatedBoxes.cat(anchors)
+
+        gt_boxes = [x.gt_boxes for x in gt_instances]
+        del gt_instances
+
+        gt_labels = []
+        matched_gt_boxes = []
+        for gt_boxes_i in gt_boxes:
+            """
+            gt_boxes_i: ground-truth boxes for i-th image
+            """
+            match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)(gt_boxes_i, anchors)
+            matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix)
+            # Matching is memory-expensive and may result in CPU tensors. But the result is small
+            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)
+
+            # A vector of labels (-1, 0, 1) for each anchor
+            gt_labels_i = self._subsample_labels(gt_labels_i)
+
+            if len(gt_boxes_i) == 0:
+                # These values won't be used anyway since the anchor is labeled as background
+                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
+            else:
+                # TODO wasted indexing computation for ignored boxes
+                matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor
+
+            gt_labels.append(gt_labels_i)  # N,AHW
+            matched_gt_boxes.append(matched_gt_boxes_i)
+        return gt_labels, matched_gt_boxes
+
+    @torch.no_grad()
+    def predict_proposals(self, anchors, pred_objectness_logits, pred_anchor_deltas, image_sizes):
+        pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
+        return find_top_rrpn_proposals(
+            pred_proposals,
+            pred_objectness_logits,
+            image_sizes,
+            self.nms_thresh,
+            self.pre_nms_topk[self.training],
+            self.post_nms_topk[self.training],
+            self.min_box_size,
+            self.training,
+        )
diff --git a/src/sts/detectron2/modeling/roi_heads/__init__.py b/src/sts/detectron2/modeling/roi_heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..35b98746c1e2510dfcfebccbed4c72babb61925b
--- /dev/null
+++ b/src/sts/detectron2/modeling/roi_heads/__init__.py
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead
+from .keypoint_head import (
+    ROI_KEYPOINT_HEAD_REGISTRY,
+    build_keypoint_head,
+    BaseKeypointRCNNHead,
+    KRCNNConvDeconvUpsampleHead,
+)
+from .mask_head import (
+    ROI_MASK_HEAD_REGISTRY,
+    build_mask_head,
+    BaseMaskRCNNHead,
+    MaskRCNNConvUpsampleHead,
+)
+from .roi_heads import (
+    ROI_HEADS_REGISTRY,
+    ROIHeads,
+    Res5ROIHeads,
+    StandardROIHeads,
+    build_roi_heads,
+    select_foreground_proposals,
+)
+from .rotated_fast_rcnn import RROIHeads
+from .fast_rcnn import FastRCNNOutputLayers
+
+from . import cascade_rcnn  # isort:skip
+
+__all__ = list(globals().keys())
diff --git a/src/sts/detectron2/modeling/roi_heads/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/modeling/roi_heads/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bde758276da61fad51910815cadfb88c766d4457
Binary files /dev/null and b/src/sts/detectron2/modeling/roi_heads/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/roi_heads/__pycache__/box_head.cpython-38.pyc b/src/sts/detectron2/modeling/roi_heads/__pycache__/box_head.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d8c268ceb823a5093d208077f53f4d7d431d2f08
Binary files /dev/null and b/src/sts/detectron2/modeling/roi_heads/__pycache__/box_head.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/roi_heads/__pycache__/cascade_rcnn.cpython-38.pyc b/src/sts/detectron2/modeling/roi_heads/__pycache__/cascade_rcnn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ceda1bfa50e202b2d5122a3101ddf3603e70b7a
Binary files /dev/null and b/src/sts/detectron2/modeling/roi_heads/__pycache__/cascade_rcnn.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/roi_heads/__pycache__/fast_rcnn.cpython-38.pyc b/src/sts/detectron2/modeling/roi_heads/__pycache__/fast_rcnn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45d60cb98777782f527bfb3174472ac535534542
Binary files /dev/null and b/src/sts/detectron2/modeling/roi_heads/__pycache__/fast_rcnn.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/roi_heads/__pycache__/keypoint_head.cpython-38.pyc b/src/sts/detectron2/modeling/roi_heads/__pycache__/keypoint_head.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..694af5d52507ddb49eb7f2423a997f1fe87343c6
Binary files /dev/null and b/src/sts/detectron2/modeling/roi_heads/__pycache__/keypoint_head.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/roi_heads/__pycache__/mask_head.cpython-38.pyc b/src/sts/detectron2/modeling/roi_heads/__pycache__/mask_head.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b2d6359a115a1cb1518ca21356d7b07319bc10b
Binary files /dev/null and b/src/sts/detectron2/modeling/roi_heads/__pycache__/mask_head.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/roi_heads/__pycache__/roi_heads.cpython-38.pyc b/src/sts/detectron2/modeling/roi_heads/__pycache__/roi_heads.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fa9b477d36d817281a4eb93f3bfdcd82fda2d40
Binary files /dev/null and b/src/sts/detectron2/modeling/roi_heads/__pycache__/roi_heads.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/roi_heads/__pycache__/rotated_fast_rcnn.cpython-38.pyc b/src/sts/detectron2/modeling/roi_heads/__pycache__/rotated_fast_rcnn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c5b2e48f78df61045cac71ab9bf282f2504ac917
Binary files /dev/null and b/src/sts/detectron2/modeling/roi_heads/__pycache__/rotated_fast_rcnn.cpython-38.pyc differ
diff --git a/src/sts/detectron2/modeling/roi_heads/box_head.py b/src/sts/detectron2/modeling/roi_heads/box_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d0370b0400d9268f13c905e4096a84ce42e9bfd
--- /dev/null
+++ b/src/sts/detectron2/modeling/roi_heads/box_head.py
@@ -0,0 +1,118 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from typing import List
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.utils.registry import Registry
+
+__all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"]
+
+ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD")
+ROI_BOX_HEAD_REGISTRY.__doc__ = """
+Registry for box heads, which make box predictions from per-region features.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+"""
+
+
+# To get torchscript support, we make the head a subclass of `nn.Sequential`.
+# Therefore, to add new layers in this head class, please make sure they are
+# added in the order they will be used in forward().
+@ROI_BOX_HEAD_REGISTRY.register()
+class FastRCNNConvFCHead(nn.Sequential):
+    """
+    A head with several 3x3 conv layers (each followed by norm & relu) and then
+    several fc layers (each followed by relu).
+    """
+
+    @configurable
+    def __init__(
+        self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm=""
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (ShapeSpec): shape of the input feature.
+            conv_dims (list[int]): the output dimensions of the conv layers
+            fc_dims (list[int]): the output dimensions of the fc layers
+            conv_norm (str or callable): normalization for the conv layers.
+                See :func:`detectron2.layers.get_norm` for supported types.
+        """
+        super().__init__()
+        assert len(conv_dims) + len(fc_dims) > 0
+
+        self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
+
+        self.conv_norm_relus = []
+        for k, conv_dim in enumerate(conv_dims):
+            conv = Conv2d(
+                self._output_size[0],
+                conv_dim,
+                kernel_size=3,
+                padding=1,
+                bias=not conv_norm,
+                norm=get_norm(conv_norm, conv_dim),
+                activation=nn.ReLU(),
+            )
+            self.add_module("conv{}".format(k + 1), conv)
+            self.conv_norm_relus.append(conv)
+            self._output_size = (conv_dim, self._output_size[1], self._output_size[2])
+
+        self.fcs = []
+        for k, fc_dim in enumerate(fc_dims):
+            if k == 0:
+                self.add_module("flatten", nn.Flatten())
+            fc = nn.Linear(int(np.prod(self._output_size)), fc_dim)
+            self.add_module("fc{}".format(k + 1), fc)
+            self.add_module("fc_relu{}".format(k + 1), nn.ReLU())
+            self.fcs.append(fc)
+            self._output_size = fc_dim
+
+        for layer in self.conv_norm_relus:
+            weight_init.c2_msra_fill(layer)
+        for layer in self.fcs:
+            weight_init.c2_xavier_fill(layer)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
+        conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
+        num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
+        fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
+        return {
+            "input_shape": input_shape,
+            "conv_dims": [conv_dim] * num_conv,
+            "fc_dims": [fc_dim] * num_fc,
+            "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM,
+        }
+
+    def forward(self, x):
+        for layer in self:
+            x = layer(x)
+        return x
+
+    @property
+    @torch.jit.unused
+    def output_shape(self):
+        """
+        Returns:
+            ShapeSpec: the output feature shape
+        """
+        o = self._output_size
+        if isinstance(o, int):
+            return ShapeSpec(channels=o)
+        else:
+            return ShapeSpec(channels=o[0], height=o[1], width=o[2])
+
+
+def build_box_head(cfg, input_shape):
+    """
+    Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`.
+    """
+    name = cfg.MODEL.ROI_BOX_HEAD.NAME
+    return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape)
diff --git a/src/sts/detectron2/modeling/roi_heads/cascade_rcnn.py b/src/sts/detectron2/modeling/roi_heads/cascade_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f6bfab22ed9c30a98f27b849a7fa3e210ba9cf2
--- /dev/null
+++ b/src/sts/detectron2/modeling/roi_heads/cascade_rcnn.py
@@ -0,0 +1,298 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import List
+import torch
+from torch import nn
+from torch.autograd.function import Function
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec
+from detectron2.structures import Boxes, Instances, pairwise_iou
+from detectron2.utils.events import get_event_storage
+
+from ..box_regression import Box2BoxTransform
+from ..matcher import Matcher
+from ..poolers import ROIPooler
+from .box_head import build_box_head
+from .fast_rcnn import FastRCNNOutputLayers, fast_rcnn_inference
+from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads
+
+
+class _ScaleGradient(Function):
+    @staticmethod
+    def forward(ctx, input, scale):
+        ctx.scale = scale
+        return input
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output * ctx.scale, None
+
+
+@ROI_HEADS_REGISTRY.register()
+class CascadeROIHeads(StandardROIHeads):
+    """
+    Implement :paper:`Cascade R-CNN`.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        box_in_features: List[str],
+        box_pooler: ROIPooler,
+        box_heads: List[nn.Module],
+        box_predictors: List[nn.Module],
+        proposal_matchers: List[Matcher],
+        **kwargs,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            box_pooler (ROIPooler): pooler that extracts region features from given boxes
+            box_heads (list[nn.Module]): box head for each cascade stage
+            box_predictors (list[nn.Module]): box predictor for each cascade stage
+            proposal_matchers (list[Matcher]): matcher with different IoU thresholds to
+                match boxes with ground truth for each stage. The first matcher matches
+                RPN proposals with ground truth, the other matchers use boxes predicted
+                by the previous stage as proposals and match them with ground truth.
+        """
+        assert "proposal_matcher" not in kwargs, (
+            "CascadeROIHeads takes 'proposal_matchers=' for each stage instead "
+            "of one 'proposal_matcher='."
+        )
+        # The first matcher matches RPN proposals with ground truth, done in the base class
+        kwargs["proposal_matcher"] = proposal_matchers[0]
+        num_stages = self.num_cascade_stages = len(box_heads)
+        box_heads = nn.ModuleList(box_heads)
+        box_predictors = nn.ModuleList(box_predictors)
+        assert len(box_predictors) == num_stages, f"{len(box_predictors)} != {num_stages}!"
+        assert len(proposal_matchers) == num_stages, f"{len(proposal_matchers)} != {num_stages}!"
+        super().__init__(
+            box_in_features=box_in_features,
+            box_pooler=box_pooler,
+            box_head=box_heads,
+            box_predictor=box_predictors,
+            **kwargs,
+        )
+        self.proposal_matchers = proposal_matchers
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = super().from_config(cfg, input_shape)
+        ret.pop("proposal_matcher")
+        return ret
+
+    @classmethod
+    def _init_box_head(cls, cfg, input_shape):
+        # fmt: off
+        in_features              = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution        = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_scales            = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio           = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type              = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+        cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS
+        cascade_ious             = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS
+        assert len(cascade_bbox_reg_weights) == len(cascade_ious)
+        assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,  \
+            "CascadeROIHeads only support class-agnostic regression now!"
+        assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0]
+        # fmt: on
+
+        in_channels = [input_shape[f].channels for f in in_features]
+        # Check all channel counts are equal
+        assert len(set(in_channels)) == 1, in_channels
+        in_channels = in_channels[0]
+
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        pooled_shape = ShapeSpec(
+            channels=in_channels, width=pooler_resolution, height=pooler_resolution
+        )
+
+        box_heads, box_predictors, proposal_matchers = [], [], []
+        for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights):
+            box_head = build_box_head(cfg, pooled_shape)
+            box_heads.append(box_head)
+            box_predictors.append(
+                FastRCNNOutputLayers(
+                    cfg,
+                    box_head.output_shape,
+                    box2box_transform=Box2BoxTransform(weights=bbox_reg_weights),
+                )
+            )
+            proposal_matchers.append(Matcher([match_iou], [0, 1], allow_low_quality_matches=False))
+        return {
+            "box_in_features": in_features,
+            "box_pooler": box_pooler,
+            "box_heads": box_heads,
+            "box_predictors": box_predictors,
+            "proposal_matchers": proposal_matchers,
+        }
+
+    def forward(self, images, features, proposals, targets=None):
+        del images
+        if self.training:
+            proposals = self.label_and_sample_proposals(proposals, targets)
+
+        if self.training:
+            # Need targets to box head
+            losses = self._forward_box(features, proposals, targets)
+            losses.update(self._forward_mask(features, proposals))
+            losses.update(self._forward_keypoint(features, proposals))
+            return proposals, losses
+        else:
+            pred_instances = self._forward_box(features, proposals)
+            pred_instances = self.forward_with_given_boxes(features, pred_instances)
+            return pred_instances, {}
+
+    def _forward_box(self, features, proposals, targets=None):
+        """
+        Args:
+            features, targets: the same as in
+                Same as in :meth:`ROIHeads.forward`.
+            proposals (list[Instances]): the per-image object proposals with
+                their matching ground truth.
+                Each has fields "proposal_boxes", and "objectness_logits",
+                "gt_classes", "gt_boxes".
+        """
+        features = [features[f] for f in self.box_in_features]
+        head_outputs = []  # (predictor, predictions, proposals)
+        prev_pred_boxes = None
+        image_sizes = [x.image_size for x in proposals]
+        for k in range(self.num_cascade_stages):
+            if k > 0:
+                # The output boxes of the previous stage are used to create the input
+                # proposals of the next stage.
+                proposals = self._create_proposals_from_boxes(prev_pred_boxes, image_sizes)
+                if self.training:
+                    proposals = self._match_and_label_boxes(proposals, k, targets)
+            predictions = self._run_stage(features, proposals, k)
+            prev_pred_boxes = self.box_predictor[k].predict_boxes(predictions, proposals)
+            head_outputs.append((self.box_predictor[k], predictions, proposals))
+
+        if self.training:
+            losses = {}
+            storage = get_event_storage()
+            for stage, (predictor, predictions, proposals) in enumerate(head_outputs):
+                with storage.name_scope("stage{}".format(stage)):
+                    stage_losses = predictor.losses(predictions, proposals)
+                losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()})
+            return losses
+        else:
+            # Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1)
+            scores_per_stage = [h[0].predict_probs(h[1], h[2]) for h in head_outputs]
+
+            # Average the scores across heads
+            scores = [
+                sum(list(scores_per_image)) * (1.0 / self.num_cascade_stages)
+                for scores_per_image in zip(*scores_per_stage)
+            ]
+            # Use the boxes of the last head
+            predictor, predictions, proposals = head_outputs[-1]
+            boxes = predictor.predict_boxes(predictions, proposals)
+            pred_instances, _ = fast_rcnn_inference(
+                boxes,
+                scores,
+                image_sizes,
+                predictor.test_score_thresh,
+                predictor.test_nms_thresh,
+                predictor.test_topk_per_image,
+            )
+            return pred_instances
+
+    @torch.no_grad()
+    def _match_and_label_boxes(self, proposals, stage, targets):
+        """
+        Match proposals with groundtruth using the matcher at the given stage.
+        Label the proposals as foreground or background based on the match.
+
+        Args:
+            proposals (list[Instances]): One Instances for each image, with
+                the field "proposal_boxes".
+            stage (int): the current stage
+            targets (list[Instances]): the ground truth instances
+
+        Returns:
+            list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes"
+        """
+        num_fg_samples, num_bg_samples = [], []
+        for proposals_per_image, targets_per_image in zip(proposals, targets):
+            match_quality_matrix = pairwise_iou(
+                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
+            )
+            # proposal_labels are 0 or 1
+            matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix)
+            if len(targets_per_image) > 0:
+                gt_classes = targets_per_image.gt_classes[matched_idxs]
+                # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
+                gt_classes[proposal_labels == 0] = self.num_classes
+                gt_boxes = targets_per_image.gt_boxes[matched_idxs]
+            else:
+                gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
+                gt_boxes = Boxes(
+                    targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4))
+                )
+            proposals_per_image.gt_classes = gt_classes
+            proposals_per_image.gt_boxes = gt_boxes
+
+            num_fg_samples.append((proposal_labels == 1).sum().item())
+            num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1])
+
+        # Log the number of fg/bg samples in each stage
+        storage = get_event_storage()
+        storage.put_scalar(
+            "stage{}/roi_head/num_fg_samples".format(stage),
+            sum(num_fg_samples) / len(num_fg_samples),
+        )
+        storage.put_scalar(
+            "stage{}/roi_head/num_bg_samples".format(stage),
+            sum(num_bg_samples) / len(num_bg_samples),
+        )
+        return proposals
+
+    def _run_stage(self, features, proposals, stage):
+        """
+        Args:
+            features (list[Tensor]): #lvl input features to ROIHeads
+            proposals (list[Instances]): #image Instances, with the field "proposal_boxes"
+            stage (int): the current stage
+
+        Returns:
+            Same output as `FastRCNNOutputLayers.forward()`.
+        """
+        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
+        # The original implementation averages the losses among heads,
+        # but scale up the parameter gradients of the heads.
+        # This is equivalent to adding the losses among heads,
+        # but scale down the gradients on features.
+        box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages)
+        box_features = self.box_head[stage](box_features)
+        return self.box_predictor[stage](box_features)
+
+    def _create_proposals_from_boxes(self, boxes, image_sizes):
+        """
+        Args:
+            boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4
+            image_sizes (list[tuple]): list of image shapes in (h, w)
+
+        Returns:
+            list[Instances]: per-image proposals with the given boxes.
+        """
+        # Just like RPN, the proposals should not have gradients
+        boxes = [Boxes(b.detach()) for b in boxes]
+        proposals = []
+        for boxes_per_image, image_size in zip(boxes, image_sizes):
+            boxes_per_image.clip(image_size)
+            if self.training:
+                # do not filter empty boxes at inference time,
+                # because the scores from each stage need to be aligned and added later
+                boxes_per_image = boxes_per_image[boxes_per_image.nonempty()]
+            prop = Instances(image_size)
+            prop.proposal_boxes = boxes_per_image
+            proposals.append(prop)
+        return proposals
diff --git a/src/sts/detectron2/modeling/roi_heads/fast_rcnn.py b/src/sts/detectron2/modeling/roi_heads/fast_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..348f6a09782a9d686f91f28eefe1d8d5b6df939d
--- /dev/null
+++ b/src/sts/detectron2/modeling/roi_heads/fast_rcnn.py
@@ -0,0 +1,622 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+from typing import Dict, List, Tuple, Union
+import torch
+from fvcore.nn import giou_loss, smooth_l1_loss
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec, batched_nms, cat, cross_entropy, nonzero_tuple
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.events import get_event_storage
+
+__all__ = ["fast_rcnn_inference", "FastRCNNOutputLayers"]
+
+
+logger = logging.getLogger(__name__)
+
+"""
+Shape shorthand in this module:
+
+    N: number of images in the minibatch
+    R: number of ROIs, combined over all images, in the minibatch
+    Ri: number of ROIs in image i
+    K: number of foreground classes. E.g.,there are 80 foreground classes in COCO.
+
+Naming convention:
+
+    deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box
+    transform (see :class:`box_regression.Box2BoxTransform`).
+
+    pred_class_logits: predicted class scores in [-inf, +inf]; use
+        softmax(pred_class_logits) to estimate P(class).
+
+    gt_classes: ground-truth classification labels in [0, K], where [0, K) represent
+        foreground object classes and K represents the background class.
+
+    pred_proposal_deltas: predicted box2box transform deltas for transforming proposals
+        to detection box predictions.
+
+    gt_proposal_deltas: ground-truth box2box transform deltas
+"""
+
+
+def fast_rcnn_inference(
+    boxes: List[torch.Tensor],
+    scores: List[torch.Tensor],
+    image_shapes: List[Tuple[int, int]],
+    score_thresh: float,
+    nms_thresh: float,
+    topk_per_image: int,
+):
+    """
+    Call `fast_rcnn_inference_single_image` for all images.
+
+    Args:
+        boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic
+            boxes for each image. Element i has shape (Ri, K * 4) if doing
+            class-specific regression, or (Ri, 4) if doing class-agnostic
+            regression, where Ri is the number of predicted objects for image i.
+            This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`.
+        scores (list[Tensor]): A list of Tensors of predicted class scores for each image.
+            Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
+            for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`.
+        image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch.
+        score_thresh (float): Only return detections with a confidence score exceeding this
+            threshold.
+        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1].
+        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
+            all detections.
+
+    Returns:
+        instances: (list[Instances]): A list of N instances, one for each image in the batch,
+            that stores the topk most confidence detections.
+        kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates
+            the corresponding boxes/scores index in [0, Ri) from the input, for image i.
+    """
+    result_per_image = [
+        fast_rcnn_inference_single_image(
+            boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image
+        )
+        for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes)
+    ]
+    return [x[0] for x in result_per_image], [x[1] for x in result_per_image]
+
+
+def _log_classification_stats(pred_logits, gt_classes, prefix="fast_rcnn"):
+    """
+    Log the classification metrics to EventStorage.
+
+    Args:
+        pred_logits: Rx(K+1) logits. The last column is for background class.
+        gt_classes: R labels
+    """
+    num_instances = gt_classes.numel()
+    if num_instances == 0:
+        return
+    pred_classes = pred_logits.argmax(dim=1)
+    bg_class_ind = pred_logits.shape[1] - 1
+
+    fg_inds = (gt_classes >= 0) & (gt_classes < bg_class_ind)
+    num_fg = fg_inds.nonzero().numel()
+    fg_gt_classes = gt_classes[fg_inds]
+    fg_pred_classes = pred_classes[fg_inds]
+
+    num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel()
+    num_accurate = (pred_classes == gt_classes).nonzero().numel()
+    fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel()
+
+    storage = get_event_storage()
+    storage.put_scalar(f"{prefix}/cls_accuracy", num_accurate / num_instances)
+    if num_fg > 0:
+        storage.put_scalar(f"{prefix}/fg_cls_accuracy", fg_num_accurate / num_fg)
+        storage.put_scalar(f"{prefix}/false_negative", num_false_negative / num_fg)
+
+
+def fast_rcnn_inference_single_image(
+    boxes,
+    scores,
+    image_shape: Tuple[int, int],
+    score_thresh: float,
+    nms_thresh: float,
+    topk_per_image: int,
+):
+    """
+    Single-image inference. Return bounding-box detection results by thresholding
+    on scores and applying non-maximum suppression (NMS).
+
+    Args:
+        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
+        per image.
+
+    Returns:
+        Same as `fast_rcnn_inference`, but for only one image.
+    """
+    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
+    if not valid_mask.all():
+        boxes = boxes[valid_mask]
+        scores = scores[valid_mask]
+
+    scores = scores[:, :-1]
+    num_bbox_reg_classes = boxes.shape[1] // 4
+    # Convert to Boxes to use the `clip` function ...
+    boxes = Boxes(boxes.reshape(-1, 4))
+    boxes.clip(image_shape)
+    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
+
+    # 1. Filter results based on detection scores. It can make NMS more efficient
+    #    by filtering out low-confidence detections.
+    filter_mask = scores > score_thresh  # R x K
+    # R' x 2. First column contains indices of the R predictions;
+    # Second column contains indices of classes.
+    filter_inds = filter_mask.nonzero()
+    if num_bbox_reg_classes == 1:
+        boxes = boxes[filter_inds[:, 0], 0]
+    else:
+        boxes = boxes[filter_mask]
+    scores = scores[filter_mask]
+
+    # 2. Apply NMS for each class independently.
+    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
+    if topk_per_image >= 0:
+        keep = keep[:topk_per_image]
+    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
+
+    result = Instances(image_shape)
+    result.pred_boxes = Boxes(boxes)
+    result.scores = scores
+    result.pred_classes = filter_inds[:, 1]
+    return result, filter_inds[:, 0]
+
+
+class FastRCNNOutputs:
+    """
+    An internal implementation that stores information about outputs of a Fast R-CNN head,
+    and provides methods that are used to decode the outputs of a Fast R-CNN head.
+    """
+
+    def __init__(
+        self,
+        box2box_transform,
+        pred_class_logits,
+        pred_proposal_deltas,
+        proposals,
+        smooth_l1_beta=0.0,
+        box_reg_loss_type="smooth_l1",
+    ):
+        """
+        Args:
+            box2box_transform (Box2BoxTransform/Box2BoxTransformRotated):
+                box2box transform instance for proposal-to-detection transformations.
+            pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class
+                logits for all R predicted object instances.
+                Each row corresponds to a predicted object instance.
+            pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for
+                class-specific or class-agnostic regression. It stores the predicted deltas that
+                transform proposals into final box detections.
+                B is the box dimension (4 or 5).
+                When B is 4, each row is [dx, dy, dw, dh (, ....)].
+                When B is 5, each row is [dx, dy, dw, dh, da (, ....)].
+            proposals (list[Instances]): A list of N Instances, where Instances i stores the
+                proposals for image i, in the field "proposal_boxes".
+                When training, each Instances must have ground-truth labels
+                stored in the field "gt_classes" and "gt_boxes".
+                The total number of all instances must be equal to R.
+            smooth_l1_beta (float): The transition point between L1 and L2 loss in
+                the smooth L1 loss function. When set to 0, the loss becomes L1. When
+                set to +inf, the loss becomes constant 0.
+            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
+        """
+        self.box2box_transform = box2box_transform
+        self.num_preds_per_image = [len(p) for p in proposals]
+        self.pred_class_logits = pred_class_logits
+        self.pred_proposal_deltas = pred_proposal_deltas
+        self.smooth_l1_beta = smooth_l1_beta
+        self.box_reg_loss_type = box_reg_loss_type
+
+        self.image_shapes = [x.image_size for x in proposals]
+
+        if len(proposals):
+            box_type = type(proposals[0].proposal_boxes)
+            # cat(..., dim=0) concatenates over all images in the batch
+            self.proposals = box_type.cat([p.proposal_boxes for p in proposals])
+            assert (
+                not self.proposals.tensor.requires_grad
+            ), "Proposals should not require gradients!"
+
+            # "gt_classes" exists if and only if training. But other gt fields may
+            # not necessarily exist in training for images that have no groundtruth.
+            if proposals[0].has("gt_classes"):
+                self.gt_classes = cat([p.gt_classes for p in proposals], dim=0)
+
+                # If "gt_boxes" does not exist, the proposals must be all negative and
+                # should not be included in regression loss computation.
+                # Here we just use proposal_boxes as an arbitrary placeholder because its
+                # value won't be used in self.box_reg_loss().
+                gt_boxes = [
+                    p.gt_boxes if p.has("gt_boxes") else p.proposal_boxes for p in proposals
+                ]
+                self.gt_boxes = box_type.cat(gt_boxes)
+        else:
+            self.proposals = Boxes(torch.zeros(0, 4, device=self.pred_proposal_deltas.device))
+        self._no_instances = len(self.proposals) == 0  # no instances found
+
+    def softmax_cross_entropy_loss(self):
+        """
+        Deprecated
+        """
+        _log_classification_stats(self.pred_class_logits, self.gt_classes)
+        return cross_entropy(self.pred_class_logits, self.gt_classes, reduction="mean")
+
+    def box_reg_loss(self):
+        """
+        Deprecated
+        """
+        if self._no_instances:
+            return 0.0 * self.pred_proposal_deltas.sum()
+
+        box_dim = self.proposals.tensor.size(1)  # 4 or 5
+        cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim
+        device = self.pred_proposal_deltas.device
+
+        bg_class_ind = self.pred_class_logits.shape[1] - 1
+        # Box delta loss is only computed between the prediction for the gt class k
+        # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
+        # for non-gt classes and background.
+        # Empty fg_inds should produce a valid loss of zero because reduction=sum.
+        fg_inds = nonzero_tuple((self.gt_classes >= 0) & (self.gt_classes < bg_class_ind))[0]
+
+        if cls_agnostic_bbox_reg:
+            # pred_proposal_deltas only corresponds to foreground class for agnostic
+            gt_class_cols = torch.arange(box_dim, device=device)
+        else:
+            # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
+            # where b is the dimension of box representation (4 or 5)
+            # Note that compared to Detectron1,
+            # we do not perform bounding box regression for background classes.
+            gt_class_cols = box_dim * self.gt_classes[fg_inds, None] + torch.arange(
+                box_dim, device=device
+            )
+
+        if self.box_reg_loss_type == "smooth_l1":
+            gt_proposal_deltas = self.box2box_transform.get_deltas(
+                self.proposals.tensor, self.gt_boxes.tensor
+            )
+            loss_box_reg = smooth_l1_loss(
+                self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
+                gt_proposal_deltas[fg_inds],
+                self.smooth_l1_beta,
+                reduction="sum",
+            )
+        elif self.box_reg_loss_type == "giou":
+            fg_pred_boxes = self.box2box_transform.apply_deltas(
+                self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
+                self.proposals.tensor[fg_inds],
+            )
+            loss_box_reg = giou_loss(
+                fg_pred_boxes,
+                self.gt_boxes.tensor[fg_inds],
+                reduction="sum",
+            )
+        else:
+            raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")
+
+        loss_box_reg = loss_box_reg / self.gt_classes.numel()
+        return loss_box_reg
+
+    def losses(self):
+        """
+        Deprecated
+        """
+        return {"loss_cls": self.softmax_cross_entropy_loss(), "loss_box_reg": self.box_reg_loss()}
+
+    def predict_boxes(self):
+        """
+        Deprecated
+        """
+        pred = self.box2box_transform.apply_deltas(self.pred_proposal_deltas, self.proposals.tensor)
+        return pred.split(self.num_preds_per_image, dim=0)
+
+    def predict_probs(self):
+        """
+        Deprecated
+        """
+        probs = F.softmax(self.pred_class_logits, dim=-1)
+        return probs.split(self.num_preds_per_image, dim=0)
+
+
+class FastRCNNOutputLayers(nn.Module):
+    """
+    Two linear layers for predicting Fast R-CNN outputs:
+
+    1. proposal-to-detection box regression deltas
+    2. classification scores
+    """
+
+    @configurable
+    def __init__(
+        self,
+        input_shape: ShapeSpec,
+        *,
+        box2box_transform,
+        num_classes: int,
+        test_score_thresh: float = 0.0,
+        test_nms_thresh: float = 0.5,
+        test_topk_per_image: int = 100,
+        cls_agnostic_bbox_reg: bool = False,
+        smooth_l1_beta: float = 0.0,
+        box_reg_loss_type: str = "smooth_l1",
+        loss_weight: Union[float, Dict[str, float]] = 1.0,
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (ShapeSpec): shape of the input feature to this module
+            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
+            num_classes (int): number of foreground classes
+            test_score_thresh (float): threshold to filter predictions results.
+            test_nms_thresh (float): NMS threshold for prediction results.
+            test_topk_per_image (int): number of top predictions to produce per image.
+            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
+            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
+                `box_reg_loss_type` is "smooth_l1"
+            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
+            loss_weight (float|dict): weights to use for losses. Can be single float for weighting
+                all losses, or a dict of individual weightings. Valid dict keys are:
+                    * "loss_cls": applied to classification loss
+                    * "loss_box_reg": applied to box regression loss
+        """
+        super().__init__()
+        if isinstance(input_shape, int):  # some backward compatibility
+            input_shape = ShapeSpec(channels=input_shape)
+        self.num_classes = num_classes
+        input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1)
+        # prediction layer for num_classes foreground classes and one background class (hence + 1)
+        self.cls_score = nn.Linear(input_size, num_classes + 1)
+        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
+        box_dim = len(box2box_transform.weights)
+        self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim)
+
+        nn.init.normal_(self.cls_score.weight, std=0.01)
+        nn.init.normal_(self.bbox_pred.weight, std=0.001)
+        for l in [self.cls_score, self.bbox_pred]:
+            nn.init.constant_(l.bias, 0)
+
+        self.box2box_transform = box2box_transform
+        self.smooth_l1_beta = smooth_l1_beta
+        self.test_score_thresh = test_score_thresh
+        self.test_nms_thresh = test_nms_thresh
+        self.test_topk_per_image = test_topk_per_image
+        self.box_reg_loss_type = box_reg_loss_type
+        if isinstance(loss_weight, float):
+            loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight}
+        self.loss_weight = loss_weight
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {
+            "input_shape": input_shape,
+            "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS),
+            # fmt: off
+            "num_classes"           : cfg.MODEL.ROI_HEADS.NUM_CLASSES,
+            "cls_agnostic_bbox_reg" : cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,
+            "smooth_l1_beta"        : cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA,
+            "test_score_thresh"     : cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST,
+            "test_nms_thresh"       : cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST,
+            "test_topk_per_image"   : cfg.TEST.DETECTIONS_PER_IMAGE,
+            "box_reg_loss_type"     : cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE,
+            "loss_weight"           : {"loss_box_reg": cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT},
+            # fmt: on
+        }
+
+    def forward(self, x):
+        """
+        Args:
+            x: per-region features of shape (N, ...) for N bounding boxes to predict.
+
+        Returns:
+            (Tensor, Tensor):
+            First tensor: shape (N,K+1), scores for each of the N box. Each row contains the
+            scores for K object categories and 1 background class.
+
+            Second tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4),
+            or (N,4) for class-agnostic regression.
+        """
+        if x.dim() > 2:
+            x = torch.flatten(x, start_dim=1)
+        scores = self.cls_score(x)
+        proposal_deltas = self.bbox_pred(x)
+        return scores, proposal_deltas
+
+    def losses(self, predictions, proposals):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were used
+                to compute predictions. The fields ``proposal_boxes``, ``gt_boxes``,
+                ``gt_classes`` are expected.
+
+        Returns:
+            Dict[str, Tensor]: dict of losses
+        """
+        scores, proposal_deltas = predictions
+
+        # parse classification outputs
+        gt_classes = (
+            cat([p.gt_classes for p in proposals], dim=0) if len(proposals) else torch.empty(0)
+        )
+        _log_classification_stats(scores, gt_classes)
+
+        # parse box regression outputs
+        if len(proposals):
+            proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)  # Nx4
+            assert not proposal_boxes.requires_grad, "Proposals should not require gradients!"
+            # If "gt_boxes" does not exist, the proposals must be all negative and
+            # should not be included in regression loss computation.
+            # Here we just use proposal_boxes as an arbitrary placeholder because its
+            # value won't be used in self.box_reg_loss().
+            gt_boxes = cat(
+                [(p.gt_boxes if p.has("gt_boxes") else p.proposal_boxes).tensor for p in proposals],
+                dim=0,
+            )
+        else:
+            proposal_boxes = gt_boxes = torch.empty((0, 4), device=proposal_deltas.device)
+
+        losses = {
+            "loss_cls": cross_entropy(scores, gt_classes, reduction="mean"),
+            "loss_box_reg": self.box_reg_loss(
+                proposal_boxes, gt_boxes, proposal_deltas, gt_classes
+            ),
+        }
+        return {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()}
+
+    def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes):
+        """
+        Args:
+            All boxes are tensors with the same shape Rx(4 or 5).
+            gt_classes is a long tensor of shape R, the gt class label of each proposal.
+            R shall be the number of proposals.
+        """
+        box_dim = proposal_boxes.shape[1]  # 4 or 5
+        # Regression loss is only computed for foreground proposals (those matched to a GT)
+        fg_inds = nonzero_tuple((gt_classes >= 0) & (gt_classes < self.num_classes))[0]
+        if pred_deltas.shape[1] == box_dim:  # cls-agnostic regression
+            fg_pred_deltas = pred_deltas[fg_inds]
+        else:
+            fg_pred_deltas = pred_deltas.view(-1, self.num_classes, box_dim)[
+                fg_inds, gt_classes[fg_inds]
+            ]
+
+        if self.box_reg_loss_type == "smooth_l1":
+            gt_pred_deltas = self.box2box_transform.get_deltas(
+                proposal_boxes[fg_inds],
+                gt_boxes[fg_inds],
+            )
+            loss_box_reg = smooth_l1_loss(
+                fg_pred_deltas, gt_pred_deltas, self.smooth_l1_beta, reduction="sum"
+            )
+        elif self.box_reg_loss_type == "giou":
+            fg_pred_boxes = self.box2box_transform.apply_deltas(
+                fg_pred_deltas, proposal_boxes[fg_inds]
+            )
+            loss_box_reg = giou_loss(fg_pred_boxes, gt_boxes[fg_inds], reduction="sum")
+        else:
+            raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")
+        # The reg loss is normalized using the total number of regions (R), not the number
+        # of foreground regions even though the box regression loss is only defined on
+        # foreground regions. Why? Because doing so gives equal training influence to
+        # each foreground example. To see how, consider two different minibatches:
+        #  (1) Contains a single foreground region
+        #  (2) Contains 100 foreground regions
+        # If we normalize by the number of foreground regions, the single example in
+        # minibatch (1) will be given 100 times as much influence as each foreground
+        # example in minibatch (2). Normalizing by the total number of regions, R,
+        # means that the single example in minibatch (1) and each of the 100 examples
+        # in minibatch (2) are given equal influence.
+        return loss_box_reg / max(gt_classes.numel(), 1.0)  # return 0 if empty
+
+    def inference(self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were
+                used to compute predictions. The ``proposal_boxes`` field is expected.
+
+        Returns:
+            list[Instances]: same as `fast_rcnn_inference`.
+            list[Tensor]: same as `fast_rcnn_inference`.
+        """
+        boxes = self.predict_boxes(predictions, proposals)
+        scores = self.predict_probs(predictions, proposals)
+        image_shapes = [x.image_size for x in proposals]
+        return fast_rcnn_inference(
+            boxes,
+            scores,
+            image_shapes,
+            self.test_score_thresh,
+            self.test_nms_thresh,
+            self.test_topk_per_image,
+        )
+
+    def predict_boxes_for_gt_classes(self, predictions, proposals):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were used
+                to compute predictions. The fields ``proposal_boxes``, ``gt_classes`` are expected.
+
+        Returns:
+            list[Tensor]:
+                A list of Tensors of predicted boxes for GT classes in case of
+                class-specific box head. Element i of the list has shape (Ri, B), where Ri is
+                the number of proposals for image i and B is the box dimension (4 or 5)
+        """
+        if not len(proposals):
+            return []
+        scores, proposal_deltas = predictions
+        proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)
+        N, B = proposal_boxes.shape
+        predict_boxes = self.box2box_transform.apply_deltas(
+            proposal_deltas, proposal_boxes
+        )  # Nx(KxB)
+
+        K = predict_boxes.shape[1] // B
+        if K > 1:
+            gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0)
+            # Some proposals are ignored or have a background class. Their gt_classes
+            # cannot be used as index.
+            gt_classes = gt_classes.clamp_(0, K - 1)
+
+            predict_boxes = predict_boxes.view(N, K, B)[
+                torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes
+            ]
+        num_prop_per_image = [len(p) for p in proposals]
+        return predict_boxes.split(num_prop_per_image)
+
+    def predict_boxes(
+        self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]
+    ):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were
+                used to compute predictions. The ``proposal_boxes`` field is expected.
+
+        Returns:
+            list[Tensor]:
+                A list of Tensors of predicted class-specific or class-agnostic boxes
+                for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is
+                the number of proposals for image i and B is the box dimension (4 or 5)
+        """
+        if not len(proposals):
+            return []
+        _, proposal_deltas = predictions
+        num_prop_per_image = [len(p) for p in proposals]
+        proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)
+        predict_boxes = self.box2box_transform.apply_deltas(
+            proposal_deltas,
+            proposal_boxes,
+        )  # Nx(KxB)
+        return predict_boxes.split(num_prop_per_image)
+
+    def predict_probs(
+        self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]
+    ):
+        """
+        Args:
+            predictions: return values of :meth:`forward()`.
+            proposals (list[Instances]): proposals that match the features that were
+                used to compute predictions.
+
+        Returns:
+            list[Tensor]:
+                A list of Tensors of predicted class probabilities for each image.
+                Element i has shape (Ri, K + 1), where Ri is the number of proposals for image i.
+        """
+        scores, _ = predictions
+        num_inst_per_image = [len(p) for p in proposals]
+        probs = F.softmax(scores, dim=-1)
+        return probs.split(num_inst_per_image, dim=0)
diff --git a/src/sts/detectron2/modeling/roi_heads/keypoint_head.py b/src/sts/detectron2/modeling/roi_heads/keypoint_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0acc138e72fcb188e4ffb3d156358b8ca59babf
--- /dev/null
+++ b/src/sts/detectron2/modeling/roi_heads/keypoint_head.py
@@ -0,0 +1,272 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import List
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ConvTranspose2d, cat, interpolate
+from detectron2.structures import Instances, heatmaps_to_keypoints
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.registry import Registry
+
+_TOTAL_SKIPPED = 0
+
+
+__all__ = [
+    "ROI_KEYPOINT_HEAD_REGISTRY",
+    "build_keypoint_head",
+    "BaseKeypointRCNNHead",
+    "KRCNNConvDeconvUpsampleHead",
+]
+
+
+ROI_KEYPOINT_HEAD_REGISTRY = Registry("ROI_KEYPOINT_HEAD")
+ROI_KEYPOINT_HEAD_REGISTRY.__doc__ = """
+Registry for keypoint heads, which make keypoint predictions from per-region features.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+"""
+
+
+def build_keypoint_head(cfg, input_shape):
+    """
+    Build a keypoint head from `cfg.MODEL.ROI_KEYPOINT_HEAD.NAME`.
+    """
+    name = cfg.MODEL.ROI_KEYPOINT_HEAD.NAME
+    return ROI_KEYPOINT_HEAD_REGISTRY.get(name)(cfg, input_shape)
+
+
+def keypoint_rcnn_loss(pred_keypoint_logits, instances, normalizer):
+    """
+    Arguments:
+        pred_keypoint_logits (Tensor): A tensor of shape (N, K, S, S) where N is the total number
+            of instances in the batch, K is the number of keypoints, and S is the side length
+            of the keypoint heatmap. The values are spatial logits.
+        instances (list[Instances]): A list of M Instances, where M is the batch size.
+            These instances are predictions from the model
+            that are in 1:1 correspondence with pred_keypoint_logits.
+            Each Instances should contain a `gt_keypoints` field containing a `structures.Keypoint`
+            instance.
+        normalizer (float): Normalize the loss by this amount.
+            If not specified, we normalize by the number of visible keypoints in the minibatch.
+
+    Returns a scalar tensor containing the loss.
+    """
+    heatmaps = []
+    valid = []
+
+    keypoint_side_len = pred_keypoint_logits.shape[2]
+    for instances_per_image in instances:
+        if len(instances_per_image) == 0:
+            continue
+        keypoints = instances_per_image.gt_keypoints
+        heatmaps_per_image, valid_per_image = keypoints.to_heatmap(
+            instances_per_image.proposal_boxes.tensor, keypoint_side_len
+        )
+        heatmaps.append(heatmaps_per_image.view(-1))
+        valid.append(valid_per_image.view(-1))
+
+    if len(heatmaps):
+        keypoint_targets = cat(heatmaps, dim=0)
+        valid = cat(valid, dim=0).to(dtype=torch.uint8)
+        valid = torch.nonzero(valid).squeeze(1)
+
+    # torch.mean (in binary_cross_entropy_with_logits) doesn't
+    # accept empty tensors, so handle it separately
+    if len(heatmaps) == 0 or valid.numel() == 0:
+        global _TOTAL_SKIPPED
+        _TOTAL_SKIPPED += 1
+        storage = get_event_storage()
+        storage.put_scalar("kpts_num_skipped_batches", _TOTAL_SKIPPED, smoothing_hint=False)
+        return pred_keypoint_logits.sum() * 0
+
+    N, K, H, W = pred_keypoint_logits.shape
+    pred_keypoint_logits = pred_keypoint_logits.view(N * K, H * W)
+
+    keypoint_loss = F.cross_entropy(
+        pred_keypoint_logits[valid], keypoint_targets[valid], reduction="sum"
+    )
+
+    # If a normalizer isn't specified, normalize by the number of visible keypoints in the minibatch
+    if normalizer is None:
+        normalizer = valid.numel()
+    keypoint_loss /= normalizer
+
+    return keypoint_loss
+
+
+def keypoint_rcnn_inference(pred_keypoint_logits: torch.Tensor, pred_instances: List[Instances]):
+    """
+    Post process each predicted keypoint heatmap in `pred_keypoint_logits` into (x, y, score)
+        and add it to the `pred_instances` as a `pred_keypoints` field.
+
+    Args:
+        pred_keypoint_logits (Tensor): A tensor of shape (R, K, S, S) where R is the total number
+           of instances in the batch, K is the number of keypoints, and S is the side length of
+           the keypoint heatmap. The values are spatial logits.
+        pred_instances (list[Instances]): A list of N Instances, where N is the number of images.
+
+    Returns:
+        None. Each element in pred_instances will contain extra "pred_keypoints" and
+            "pred_keypoint_heatmaps" fields. "pred_keypoints" is a tensor of shape
+            (#instance, K, 3) where the last dimension corresponds to (x, y, score).
+            The scores are larger than 0. "pred_keypoint_heatmaps" contains the raw
+            keypoint logits as passed to this function.
+    """
+    # flatten all bboxes from all images together (list[Boxes] -> Rx4 tensor)
+    bboxes_flat = cat([b.pred_boxes.tensor for b in pred_instances], dim=0)
+
+    pred_keypoint_logits = pred_keypoint_logits.detach()
+    keypoint_results = heatmaps_to_keypoints(pred_keypoint_logits, bboxes_flat.detach())
+    num_instances_per_image = [len(i) for i in pred_instances]
+    keypoint_results = keypoint_results[:, :, [0, 1, 3]].split(num_instances_per_image, dim=0)
+    heatmap_results = pred_keypoint_logits.split(num_instances_per_image, dim=0)
+
+    for keypoint_results_per_image, heatmap_results_per_image, instances_per_image in zip(
+        keypoint_results, heatmap_results, pred_instances
+    ):
+        # keypoint_results_per_image is (num instances)x(num keypoints)x(x, y, score)
+        # heatmap_results_per_image is (num instances)x(num keypoints)x(side)x(side)
+        instances_per_image.pred_keypoints = keypoint_results_per_image
+        instances_per_image.pred_keypoint_heatmaps = heatmap_results_per_image
+
+
+class BaseKeypointRCNNHead(nn.Module):
+    """
+    Implement the basic Keypoint R-CNN losses and inference logic described in
+    Sec. 5 of :paper:`Mask R-CNN`.
+    """
+
+    @configurable
+    def __init__(self, *, num_keypoints, loss_weight=1.0, loss_normalizer=1.0):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            num_keypoints (int): number of keypoints to predict
+            loss_weight (float): weight to multiple on the keypoint loss
+            loss_normalizer (float or str):
+                If float, divide the loss by `loss_normalizer * #images`.
+                If 'visible', the loss is normalized by the total number of
+                visible keypoints across images.
+        """
+        super().__init__()
+        self.num_keypoints = num_keypoints
+        self.loss_weight = loss_weight
+        assert loss_normalizer == "visible" or isinstance(loss_normalizer, float), loss_normalizer
+        self.loss_normalizer = loss_normalizer
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = {
+            "loss_weight": cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT,
+            "num_keypoints": cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS,
+        }
+        normalize_by_visible = (
+            cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS
+        )  # noqa
+        if not normalize_by_visible:
+            batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
+            positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
+            ret["loss_normalizer"] = (
+                ret["num_keypoints"] * batch_size_per_image * positive_sample_fraction
+            )
+        else:
+            ret["loss_normalizer"] = "visible"
+        return ret
+
+    def forward(self, x, instances: List[Instances]):
+        """
+        Args:
+            x: input 4D region feature(s) provided by :class:`ROIHeads`.
+            instances (list[Instances]): contains the boxes & labels corresponding
+                to the input features.
+                Exact format is up to its caller to decide.
+                Typically, this is the foreground instances in training, with
+                "proposal_boxes" field and other gt annotations.
+                In inference, it contains boxes that are already predicted.
+
+        Returns:
+            A dict of losses if in training. The predicted "instances" if in inference.
+        """
+        x = self.layers(x)
+        if self.training:
+            num_images = len(instances)
+            normalizer = (
+                None if self.loss_normalizer == "visible" else num_images * self.loss_normalizer
+            )
+            return {
+                "loss_keypoint": keypoint_rcnn_loss(x, instances, normalizer=normalizer)
+                * self.loss_weight
+            }
+        else:
+            keypoint_rcnn_inference(x, instances)
+            return instances
+
+    def layers(self, x):
+        """
+        Neural network layers that makes predictions from regional input features.
+        """
+        raise NotImplementedError
+
+
+# To get torchscript support, we make the head a subclass of `nn.Sequential`.
+# Therefore, to add new layers in this head class, please make sure they are
+# added in the order they will be used in forward().
+@ROI_KEYPOINT_HEAD_REGISTRY.register()
+class KRCNNConvDeconvUpsampleHead(BaseKeypointRCNNHead, nn.Sequential):
+    """
+    A standard keypoint head containing a series of 3x3 convs, followed by
+    a transpose convolution and bilinear interpolation for upsampling.
+    It is described in Sec. 5 of :paper:`Mask R-CNN`.
+    """
+
+    @configurable
+    def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (ShapeSpec): shape of the input feature
+            conv_dims: an iterable of output channel counts for each conv in the head
+                         e.g. (512, 512, 512) for three convs outputting 512 channels.
+        """
+        super().__init__(num_keypoints=num_keypoints, **kwargs)
+
+        # default up_scale to 2.0 (this can be made an option)
+        up_scale = 2.0
+        in_channels = input_shape.channels
+
+        for idx, layer_channels in enumerate(conv_dims, 1):
+            module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1)
+            self.add_module("conv_fcn{}".format(idx), module)
+            self.add_module("conv_fcn_relu{}".format(idx), nn.ReLU())
+            in_channels = layer_channels
+
+        deconv_kernel = 4
+        self.score_lowres = ConvTranspose2d(
+            in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1
+        )
+        self.up_scale = up_scale
+
+        for name, param in self.named_parameters():
+            if "bias" in name:
+                nn.init.constant_(param, 0)
+            elif "weight" in name:
+                # Caffe2 implementation uses MSRAFill, which in fact
+                # corresponds to kaiming_normal_ in PyTorch
+                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = super().from_config(cfg, input_shape)
+        ret["input_shape"] = input_shape
+        ret["conv_dims"] = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS
+        return ret
+
+    def layers(self, x):
+        for layer in self:
+            x = layer(x)
+        x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False)
+        return x
diff --git a/src/sts/detectron2/modeling/roi_heads/mask_head.py b/src/sts/detectron2/modeling/roi_heads/mask_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ac5c4b9aaa34653d6c50e512a5a4300da450c7f
--- /dev/null
+++ b/src/sts/detectron2/modeling/roi_heads/mask_head.py
@@ -0,0 +1,292 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from typing import List
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm
+from detectron2.structures import Instances
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.registry import Registry
+
+__all__ = [
+    "BaseMaskRCNNHead",
+    "MaskRCNNConvUpsampleHead",
+    "build_mask_head",
+    "ROI_MASK_HEAD_REGISTRY",
+]
+
+
+ROI_MASK_HEAD_REGISTRY = Registry("ROI_MASK_HEAD")
+ROI_MASK_HEAD_REGISTRY.__doc__ = """
+Registry for mask heads, which predicts instance masks given
+per-region features.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+"""
+
+
+@torch.jit.unused
+def mask_rcnn_loss(pred_mask_logits: torch.Tensor, instances: List[Instances], vis_period: int = 0):
+    """
+    Compute the mask prediction loss defined in the Mask R-CNN paper.
+
+    Args:
+        pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask)
+            for class-specific or class-agnostic, where B is the total number of predicted masks
+            in all images, C is the number of foreground classes, and Hmask, Wmask are the height
+            and width of the mask predictions. The values are logits.
+        instances (list[Instances]): A list of N Instances, where N is the number of images
+            in the batch. These instances are in 1:1
+            correspondence with the pred_mask_logits. The ground-truth labels (class, box, mask,
+            ...) associated with each instance are stored in fields.
+        vis_period (int): the period (in steps) to dump visualization.
+
+    Returns:
+        mask_loss (Tensor): A scalar tensor containing the loss.
+    """
+    cls_agnostic_mask = pred_mask_logits.size(1) == 1
+    total_num_masks = pred_mask_logits.size(0)
+    mask_side_len = pred_mask_logits.size(2)
+    assert pred_mask_logits.size(2) == pred_mask_logits.size(3), "Mask prediction must be square!"
+
+    gt_classes = []
+    gt_masks = []
+    for instances_per_image in instances:
+        if len(instances_per_image) == 0:
+            continue
+        if not cls_agnostic_mask:
+            gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64)
+            gt_classes.append(gt_classes_per_image)
+
+        gt_masks_per_image = instances_per_image.gt_masks.crop_and_resize(
+            instances_per_image.proposal_boxes.tensor, mask_side_len
+        ).to(device=pred_mask_logits.device)
+        # A tensor of shape (N, M, M), N=#instances in the image; M=mask_side_len
+        gt_masks.append(gt_masks_per_image)
+
+    if len(gt_masks) == 0:
+        return pred_mask_logits.sum() * 0
+
+    gt_masks = cat(gt_masks, dim=0)
+
+    if cls_agnostic_mask:
+        pred_mask_logits = pred_mask_logits[:, 0]
+    else:
+        indices = torch.arange(total_num_masks)
+        gt_classes = cat(gt_classes, dim=0)
+        pred_mask_logits = pred_mask_logits[indices, gt_classes]
+
+    if gt_masks.dtype == torch.bool:
+        gt_masks_bool = gt_masks
+    else:
+        # Here we allow gt_masks to be float as well (depend on the implementation of rasterize())
+        gt_masks_bool = gt_masks > 0.5
+    gt_masks = gt_masks.to(dtype=torch.float32)
+
+    # Log the training accuracy (using gt classes and 0.5 threshold)
+    mask_incorrect = (pred_mask_logits > 0.0) != gt_masks_bool
+    mask_accuracy = 1 - (mask_incorrect.sum().item() / max(mask_incorrect.numel(), 1.0))
+    num_positive = gt_masks_bool.sum().item()
+    false_positive = (mask_incorrect & ~gt_masks_bool).sum().item() / max(
+        gt_masks_bool.numel() - num_positive, 1.0
+    )
+    false_negative = (mask_incorrect & gt_masks_bool).sum().item() / max(num_positive, 1.0)
+
+    storage = get_event_storage()
+    storage.put_scalar("mask_rcnn/accuracy", mask_accuracy)
+    storage.put_scalar("mask_rcnn/false_positive", false_positive)
+    storage.put_scalar("mask_rcnn/false_negative", false_negative)
+    if vis_period > 0 and storage.iter % vis_period == 0:
+        pred_masks = pred_mask_logits.sigmoid()
+        vis_masks = torch.cat([pred_masks, gt_masks], axis=2)
+        name = "Left: mask prediction;   Right: mask GT"
+        for idx, vis_mask in enumerate(vis_masks):
+            vis_mask = torch.stack([vis_mask] * 3, axis=0)
+            storage.put_image(name + f" ({idx})", vis_mask)
+
+    mask_loss = F.binary_cross_entropy_with_logits(pred_mask_logits, gt_masks, reduction="mean")
+    return mask_loss
+
+
+def mask_rcnn_inference(pred_mask_logits: torch.Tensor, pred_instances: List[Instances]):
+    """
+    Convert pred_mask_logits to estimated foreground probability masks while also
+    extracting only the masks for the predicted classes in pred_instances. For each
+    predicted box, the mask of the same class is attached to the instance by adding a
+    new "pred_masks" field to pred_instances.
+
+    Args:
+        pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask)
+            for class-specific or class-agnostic, where B is the total number of predicted masks
+            in all images, C is the number of foreground classes, and Hmask, Wmask are the height
+            and width of the mask predictions. The values are logits.
+        pred_instances (list[Instances]): A list of N Instances, where N is the number of images
+            in the batch. Each Instances must have field "pred_classes".
+
+    Returns:
+        None. pred_instances will contain an extra "pred_masks" field storing a mask of size (Hmask,
+            Wmask) for predicted class. Note that the masks are returned as a soft (non-quantized)
+            masks the resolution predicted by the network; post-processing steps, such as resizing
+            the predicted masks to the original image resolution and/or binarizing them, is left
+            to the caller.
+    """
+    cls_agnostic_mask = pred_mask_logits.size(1) == 1
+
+    if cls_agnostic_mask:
+        mask_probs_pred = pred_mask_logits.sigmoid()
+    else:
+        # Select masks corresponding to the predicted classes
+        num_masks = pred_mask_logits.shape[0]
+        class_pred = cat([i.pred_classes for i in pred_instances])
+        indices = torch.arange(num_masks, device=class_pred.device)
+        mask_probs_pred = pred_mask_logits[indices, class_pred][:, None].sigmoid()
+    # mask_probs_pred.shape: (B, 1, Hmask, Wmask)
+
+    num_boxes_per_image = [len(i) for i in pred_instances]
+    mask_probs_pred = mask_probs_pred.split(num_boxes_per_image, dim=0)
+
+    for prob, instances in zip(mask_probs_pred, pred_instances):
+        instances.pred_masks = prob  # (1, Hmask, Wmask)
+
+
+class BaseMaskRCNNHead(nn.Module):
+    """
+    Implement the basic Mask R-CNN losses and inference logic described in :paper:`Mask R-CNN`
+    """
+
+    @configurable
+    def __init__(self, *, loss_weight: float = 1.0, vis_period: int = 0):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            loss_weight (float): multiplier of the loss
+            vis_period (int): visualization period
+        """
+        super().__init__()
+        self.vis_period = vis_period
+        self.loss_weight = loss_weight
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {"vis_period": cfg.VIS_PERIOD}
+
+    def forward(self, x, instances: List[Instances]):
+        """
+        Args:
+            x: input region feature(s) provided by :class:`ROIHeads`.
+            instances (list[Instances]): contains the boxes & labels corresponding
+                to the input features.
+                Exact format is up to its caller to decide.
+                Typically, this is the foreground instances in training, with
+                "proposal_boxes" field and other gt annotations.
+                In inference, it contains boxes that are already predicted.
+
+        Returns:
+            A dict of losses in training. The predicted "instances" in inference.
+        """
+        x = self.layers(x)
+        if self.training:
+            return {"loss_mask": mask_rcnn_loss(x, instances, self.vis_period) * self.loss_weight}
+        else:
+            mask_rcnn_inference(x, instances)
+            return instances
+
+    def layers(self, x):
+        """
+        Neural network layers that makes predictions from input features.
+        """
+        raise NotImplementedError
+
+
+# To get torchscript support, we make the head a subclass of `nn.Sequential`.
+# Therefore, to add new layers in this head class, please make sure they are
+# added in the order they will be used in forward().
+@ROI_MASK_HEAD_REGISTRY.register()
+class MaskRCNNConvUpsampleHead(BaseMaskRCNNHead, nn.Sequential):
+    """
+    A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`).
+    Predictions are made with a final 1x1 conv layer.
+    """
+
+    @configurable
+    def __init__(self, input_shape: ShapeSpec, *, num_classes, conv_dims, conv_norm="", **kwargs):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            input_shape (ShapeSpec): shape of the input feature
+            num_classes (int): the number of foreground classes (i.e. background is not
+                included). 1 if using class agnostic prediction.
+            conv_dims (list[int]): a list of N>0 integers representing the output dimensions
+                of N-1 conv layers and the last upsample layer.
+            conv_norm (str or callable): normalization for the conv layers.
+                See :func:`detectron2.layers.get_norm` for supported types.
+        """
+        super().__init__(**kwargs)
+        assert len(conv_dims) >= 1, "conv_dims have to be non-empty!"
+
+        self.conv_norm_relus = []
+
+        cur_channels = input_shape.channels
+        for k, conv_dim in enumerate(conv_dims[:-1]):
+            conv = Conv2d(
+                cur_channels,
+                conv_dim,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                bias=not conv_norm,
+                norm=get_norm(conv_norm, conv_dim),
+                activation=nn.ReLU(),
+            )
+            self.add_module("mask_fcn{}".format(k + 1), conv)
+            self.conv_norm_relus.append(conv)
+            cur_channels = conv_dim
+
+        self.deconv = ConvTranspose2d(
+            cur_channels, conv_dims[-1], kernel_size=2, stride=2, padding=0
+        )
+        self.add_module("deconv_relu", nn.ReLU())
+        cur_channels = conv_dims[-1]
+
+        self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1, stride=1, padding=0)
+
+        for layer in self.conv_norm_relus + [self.deconv]:
+            weight_init.c2_msra_fill(layer)
+        # use normal distribution initialization for mask prediction layer
+        nn.init.normal_(self.predictor.weight, std=0.001)
+        if self.predictor.bias is not None:
+            nn.init.constant_(self.predictor.bias, 0)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = super().from_config(cfg, input_shape)
+        conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
+        num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
+        ret.update(
+            conv_dims=[conv_dim] * (num_conv + 1),  # +1 for ConvTranspose
+            conv_norm=cfg.MODEL.ROI_MASK_HEAD.NORM,
+            input_shape=input_shape,
+        )
+        if cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK:
+            ret["num_classes"] = 1
+        else:
+            ret["num_classes"] = cfg.MODEL.ROI_HEADS.NUM_CLASSES
+        return ret
+
+    def layers(self, x):
+        for layer in self:
+            x = layer(x)
+        return x
+
+
+def build_mask_head(cfg, input_shape):
+    """
+    Build a mask head defined by `cfg.MODEL.ROI_MASK_HEAD.NAME`.
+    """
+    name = cfg.MODEL.ROI_MASK_HEAD.NAME
+    return ROI_MASK_HEAD_REGISTRY.get(name)(cfg, input_shape)
diff --git a/src/sts/detectron2/modeling/roi_heads/roi_heads.py b/src/sts/detectron2/modeling/roi_heads/roi_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..64f5e7510ee0509f5db53d022d982380ce6cf886
--- /dev/null
+++ b/src/sts/detectron2/modeling/roi_heads/roi_heads.py
@@ -0,0 +1,870 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import inspect
+import logging
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+import torch
+from torch import nn
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec, nonzero_tuple
+from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou
+from detectron2.utils.events import get_event_storage
+from detectron2.utils.registry import Registry
+
+from ..backbone.resnet import BottleneckBlock, ResNet
+from ..matcher import Matcher
+from ..poolers import ROIPooler
+from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals
+from ..sampling import subsample_labels
+from .box_head import build_box_head
+from .fast_rcnn import FastRCNNOutputLayers
+from .keypoint_head import build_keypoint_head
+from .mask_head import build_mask_head
+
+ROI_HEADS_REGISTRY = Registry("ROI_HEADS")
+ROI_HEADS_REGISTRY.__doc__ = """
+Registry for ROI heads in a generalized R-CNN model.
+ROIHeads take feature maps and region proposals, and
+perform per-region computation.
+
+The registered object will be called with `obj(cfg, input_shape)`.
+The call is expected to return an :class:`ROIHeads`.
+"""
+
+logger = logging.getLogger(__name__)
+
+
+def build_roi_heads(cfg, input_shape):
+    """
+    Build ROIHeads defined by `cfg.MODEL.ROI_HEADS.NAME`.
+    """
+    name = cfg.MODEL.ROI_HEADS.NAME
+    return ROI_HEADS_REGISTRY.get(name)(cfg, input_shape)
+
+
+def select_foreground_proposals(
+    proposals: List[Instances], bg_label: int
+) -> Tuple[List[Instances], List[torch.Tensor]]:
+    """
+    Given a list of N Instances (for N images), each containing a `gt_classes` field,
+    return a list of Instances that contain only instances with `gt_classes != -1 &&
+    gt_classes != bg_label`.
+
+    Args:
+        proposals (list[Instances]): A list of N Instances, where N is the number of
+            images in the batch.
+        bg_label: label index of background class.
+
+    Returns:
+        list[Instances]: N Instances, each contains only the selected foreground instances.
+        list[Tensor]: N boolean vector, correspond to the selection mask of
+            each Instances object. True for selected instances.
+    """
+    assert isinstance(proposals, (list, tuple))
+    assert isinstance(proposals[0], Instances)
+    assert proposals[0].has("gt_classes")
+    fg_proposals = []
+    fg_selection_masks = []
+    for proposals_per_image in proposals:
+        gt_classes = proposals_per_image.gt_classes
+        fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label)
+        fg_idxs = fg_selection_mask.nonzero().squeeze(1)
+        fg_proposals.append(proposals_per_image[fg_idxs])
+        fg_selection_masks.append(fg_selection_mask)
+    return fg_proposals, fg_selection_masks
+
+
+def select_proposals_with_visible_keypoints(proposals: List[Instances]) -> List[Instances]:
+    """
+    Args:
+        proposals (list[Instances]): a list of N Instances, where N is the
+            number of images.
+
+    Returns:
+        proposals: only contains proposals with at least one visible keypoint.
+
+    Note that this is still slightly different from Detectron.
+    In Detectron, proposals for training keypoint head are re-sampled from
+    all the proposals with IOU>threshold & >=1 visible keypoint.
+
+    Here, the proposals are first sampled from all proposals with
+    IOU>threshold, then proposals with no visible keypoint are filtered out.
+    This strategy seems to make no difference on Detectron and is easier to implement.
+    """
+    ret = []
+    all_num_fg = []
+    for proposals_per_image in proposals:
+        # If empty/unannotated image (hard negatives), skip filtering for train
+        if len(proposals_per_image) == 0:
+            ret.append(proposals_per_image)
+            continue
+        gt_keypoints = proposals_per_image.gt_keypoints.tensor
+        # #fg x K x 3
+        vis_mask = gt_keypoints[:, :, 2] >= 1
+        xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1]
+        proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(dim=1)  # #fg x 1 x 4
+        kp_in_box = (
+            (xs >= proposal_boxes[:, :, 0])
+            & (xs <= proposal_boxes[:, :, 2])
+            & (ys >= proposal_boxes[:, :, 1])
+            & (ys <= proposal_boxes[:, :, 3])
+        )
+        selection = (kp_in_box & vis_mask).any(dim=1)
+        selection_idxs = nonzero_tuple(selection)[0]
+        all_num_fg.append(selection_idxs.numel())
+        ret.append(proposals_per_image[selection_idxs])
+
+    storage = get_event_storage()
+    storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg))
+    return ret
+
+
+class ROIHeads(torch.nn.Module):
+    """
+    ROIHeads perform all per-region computation in an R-CNN.
+
+    It typically contains logic to
+
+    1. (in training only) match proposals with ground truth and sample them
+    2. crop the regions and extract per-region features using proposals
+    3. make per-region predictions with different heads
+
+    It can have many variants, implemented as subclasses of this class.
+    This base class contains the logic to match/sample proposals.
+    But it is not necessary to inherit this class if the sampling logic is not needed.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        num_classes,
+        batch_size_per_image,
+        positive_fraction,
+        proposal_matcher,
+        proposal_append_gt=True
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            num_classes (int): number of foreground classes (i.e. background is not included)
+            batch_size_per_image (int): number of proposals to sample for training
+            positive_fraction (float): fraction of positive (foreground) proposals
+                to sample for training.
+            proposal_matcher (Matcher): matcher that matches proposals and ground truth
+            proposal_append_gt (bool): whether to include ground truth as proposals as well
+        """
+        super().__init__()
+        self.batch_size_per_image = batch_size_per_image
+        self.positive_fraction = positive_fraction
+        self.num_classes = num_classes
+        self.proposal_matcher = proposal_matcher
+        self.proposal_append_gt = proposal_append_gt
+
+    @classmethod
+    def from_config(cls, cfg):
+        return {
+            "batch_size_per_image": cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE,
+            "positive_fraction": cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION,
+            "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES,
+            "proposal_append_gt": cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT,
+            # Matcher to assign box proposals to gt boxes
+            "proposal_matcher": Matcher(
+                cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
+                cfg.MODEL.ROI_HEADS.IOU_LABELS,
+                allow_low_quality_matches=False,
+            ),
+        }
+
+    def _sample_proposals(
+        self, matched_idxs: torch.Tensor, matched_labels: torch.Tensor, gt_classes: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Based on the matching between N proposals and M groundtruth,
+        sample the proposals and set their classification labels.
+
+        Args:
+            matched_idxs (Tensor): a vector of length N, each is the best-matched
+                gt index in [0, M) for each proposal.
+            matched_labels (Tensor): a vector of length N, the matcher's label
+                (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal.
+            gt_classes (Tensor): a vector of length M.
+
+        Returns:
+            Tensor: a vector of indices of sampled proposals. Each is in [0, N).
+            Tensor: a vector of the same length, the classification label for
+                each sampled proposal. Each sample is labeled as either a category in
+                [0, num_classes) or the background (num_classes).
+        """
+        has_gt = gt_classes.numel() > 0
+        # Get the corresponding GT for each proposal
+        if has_gt:
+            gt_classes = gt_classes[matched_idxs]
+            # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
+            gt_classes[matched_labels == 0] = self.num_classes
+            # Label ignore proposals (-1 label)
+            gt_classes[matched_labels == -1] = -1
+        else:
+            gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
+
+        sampled_fg_idxs, sampled_bg_idxs = subsample_labels(
+            gt_classes, self.batch_size_per_image, self.positive_fraction, self.num_classes
+        )
+
+        sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0)
+        return sampled_idxs, gt_classes[sampled_idxs]
+
+    @torch.no_grad()
+    def label_and_sample_proposals(
+        self, proposals: List[Instances], targets: List[Instances]
+    ) -> List[Instances]:
+        """
+        Prepare some proposals to be used to train the ROI heads.
+        It performs box matching between `proposals` and `targets`, and assigns
+        training labels to the proposals.
+        It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth
+        boxes, with a fraction of positives that is no larger than
+        ``self.positive_fraction``.
+
+        Args:
+            See :meth:`ROIHeads.forward`
+
+        Returns:
+            list[Instances]:
+                length `N` list of `Instances`s containing the proposals
+                sampled for training. Each `Instances` has the following fields:
+
+                - proposal_boxes: the proposal boxes
+                - gt_boxes: the ground-truth box that the proposal is assigned to
+                  (this is only meaningful if the proposal has a label > 0; if label = 0
+                  then the ground-truth box is random)
+
+                Other fields such as "gt_classes", "gt_masks", that's included in `targets`.
+        """
+        gt_boxes = [x.gt_boxes for x in targets]
+        # Augment proposals with ground-truth boxes.
+        # In the case of learned proposals (e.g., RPN), when training starts
+        # the proposals will be low quality due to random initialization.
+        # It's possible that none of these initial
+        # proposals have high enough overlap with the gt objects to be used
+        # as positive examples for the second stage components (box head,
+        # cls head, mask head). Adding the gt boxes to the set of proposals
+        # ensures that the second stage components will have some positive
+        # examples from the start of training. For RPN, this augmentation improves
+        # convergence and empirically improves box AP on COCO by about 0.5
+        # points (under one tested configuration).
+        if self.proposal_append_gt:
+            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)
+
+        proposals_with_gt = []
+
+        num_fg_samples = []
+        num_bg_samples = []
+        for proposals_per_image, targets_per_image in zip(proposals, targets):
+            has_gt = len(targets_per_image) > 0
+            match_quality_matrix = pairwise_iou(
+                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
+            )
+            matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix)
+            sampled_idxs, gt_classes = self._sample_proposals(
+                matched_idxs, matched_labels, targets_per_image.gt_classes
+            )
+
+            # Set target attributes of the sampled proposals:
+            proposals_per_image = proposals_per_image[sampled_idxs]
+            proposals_per_image.gt_classes = gt_classes
+
+            if has_gt:
+                sampled_targets = matched_idxs[sampled_idxs]
+                # We index all the attributes of targets that start with "gt_"
+                # and have not been added to proposals yet (="gt_classes").
+                # NOTE: here the indexing waste some compute, because heads
+                # like masks, keypoints, etc, will filter the proposals again,
+                # (by foreground/background, or number of keypoints in the image, etc)
+                # so we essentially index the data twice.
+                for (trg_name, trg_value) in targets_per_image.get_fields().items():
+                    if trg_name.startswith("gt_") and not proposals_per_image.has(trg_name):
+                        proposals_per_image.set(trg_name, trg_value[sampled_targets])
+            # If no GT is given in the image, we don't know what a dummy gt value can be.
+            # Therefore the returned proposals won't have any gt_* fields, except for a
+            # gt_classes full of background label.
+
+            num_bg_samples.append((gt_classes == self.num_classes).sum().item())
+            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
+            proposals_with_gt.append(proposals_per_image)
+
+        # Log the number of fg/bg samples that are selected for training ROI heads
+        storage = get_event_storage()
+        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
+        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))
+
+        return proposals_with_gt
+
+    def forward(
+        self,
+        images: ImageList,
+        features: Dict[str, torch.Tensor],
+        proposals: List[Instances],
+        targets: Optional[List[Instances]] = None,
+    ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]:
+        """
+        Args:
+            images (ImageList):
+            features (dict[str,Tensor]): input data as a mapping from feature
+                map name to tensor. Axis 0 represents the number of images `N` in
+                the input data; axes 1-3 are channels, height, and width, which may
+                vary between feature maps (e.g., if a feature pyramid is used).
+            proposals (list[Instances]): length `N` list of `Instances`. The i-th
+                `Instances` contains object proposals for the i-th input image,
+                with fields "proposal_boxes" and "objectness_logits".
+            targets (list[Instances], optional): length `N` list of `Instances`. The i-th
+                `Instances` contains the ground-truth per-instance annotations
+                for the i-th input image.  Specify `targets` during training only.
+                It may have the following fields:
+
+                - gt_boxes: the bounding box of each instance.
+                - gt_classes: the label for each instance with a category ranging in [0, #class].
+                - gt_masks: PolygonMasks or BitMasks, the ground-truth masks of each instance.
+                - gt_keypoints: NxKx3, the groud-truth keypoints for each instance.
+
+        Returns:
+            list[Instances]: length `N` list of `Instances` containing the
+            detected instances. Returned during inference only; may be [] during training.
+
+            dict[str->Tensor]:
+            mapping from a named loss to a tensor storing the loss. Used during training only.
+        """
+        raise NotImplementedError()
+
+
+@ROI_HEADS_REGISTRY.register()
+class Res5ROIHeads(ROIHeads):
+    """
+    The ROIHeads in a typical "C4" R-CNN model, where
+    the box and mask head share the cropping and
+    the per-region feature computation by a Res5 block.
+    See :paper:`ResNet` Appendix A.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        in_features: List[str],
+        pooler: ROIPooler,
+        res5: nn.Module,
+        box_predictor: nn.Module,
+        mask_head: Optional[nn.Module] = None,
+        **kwargs
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            in_features (list[str]): list of backbone feature map names to use for
+                feature extraction
+            pooler (ROIPooler): pooler to extra region features from backbone
+            res5 (nn.Sequential): a CNN to compute per-region features, to be used by
+                ``box_predictor`` and ``mask_head``. Typically this is a "res5"
+                block from a ResNet.
+            box_predictor (nn.Module): make box predictions from the feature.
+                Should have the same interface as :class:`FastRCNNOutputLayers`.
+            mask_head (nn.Module): transform features to make mask predictions
+        """
+        super().__init__(**kwargs)
+        self.in_features = in_features
+        self.pooler = pooler
+        if isinstance(res5, (list, tuple)):
+            res5 = nn.Sequential(*res5)
+        self.res5 = res5
+        self.box_predictor = box_predictor
+        self.mask_on = mask_head is not None
+        if self.mask_on:
+            self.mask_head = mask_head
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        # fmt: off
+        ret = super().from_config(cfg)
+        in_features = ret["in_features"] = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+        pooler_scales     = (1.0 / input_shape[in_features[0]].stride, )
+        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        mask_on           = cfg.MODEL.MASK_ON
+        # fmt: on
+        assert not cfg.MODEL.KEYPOINT_ON
+        assert len(in_features) == 1
+
+        ret["pooler"] = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+
+        # Compatbility with old moco code. Might be useful.
+        # See notes in StandardROIHeads.from_config
+        if not inspect.ismethod(cls._build_res5_block):
+            logger.warning(
+                "The behavior of _build_res5_block may change. "
+                "Please do not depend on private methods."
+            )
+            cls._build_res5_block = classmethod(cls._build_res5_block)
+
+        ret["res5"], out_channels = cls._build_res5_block(cfg)
+        ret["box_predictor"] = FastRCNNOutputLayers(
+            cfg, ShapeSpec(channels=out_channels, height=1, width=1)
+        )
+
+        if mask_on:
+            ret["mask_head"] = build_mask_head(
+                cfg,
+                ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution),
+            )
+        return ret
+
+    @classmethod
+    def _build_res5_block(cls, cfg):
+        # fmt: off
+        stage_channel_factor = 2 ** 3  # res5 is 8x res2
+        num_groups           = cfg.MODEL.RESNETS.NUM_GROUPS
+        width_per_group      = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
+        bottleneck_channels  = num_groups * width_per_group * stage_channel_factor
+        out_channels         = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor
+        stride_in_1x1        = cfg.MODEL.RESNETS.STRIDE_IN_1X1
+        norm                 = cfg.MODEL.RESNETS.NORM
+        assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \
+            "Deformable conv is not yet supported in res5 head."
+        # fmt: on
+
+        blocks = ResNet.make_stage(
+            BottleneckBlock,
+            3,
+            stride_per_block=[2, 1, 1],
+            in_channels=out_channels // 2,
+            bottleneck_channels=bottleneck_channels,
+            out_channels=out_channels,
+            num_groups=num_groups,
+            norm=norm,
+            stride_in_1x1=stride_in_1x1,
+        )
+        return nn.Sequential(*blocks), out_channels
+
+    def _shared_roi_transform(self, features, boxes):
+        x = self.pooler(features, boxes)
+        return self.res5(x)
+
+    def forward(self, images, features, proposals, targets=None):
+        """
+        See :meth:`ROIHeads.forward`.
+        """
+        del images
+
+        if self.training:
+            assert targets
+            proposals = self.label_and_sample_proposals(proposals, targets)
+        del targets
+
+        proposal_boxes = [x.proposal_boxes for x in proposals]
+        box_features = self._shared_roi_transform(
+            [features[f] for f in self.in_features], proposal_boxes
+        )
+        predictions = self.box_predictor(box_features.mean(dim=[2, 3]))
+
+        if self.training:
+            del features
+            losses = self.box_predictor.losses(predictions, proposals)
+            if self.mask_on:
+                proposals, fg_selection_masks = select_foreground_proposals(
+                    proposals, self.num_classes
+                )
+                # Since the ROI feature transform is shared between boxes and masks,
+                # we don't need to recompute features. The mask loss is only defined
+                # on foreground proposals, so we need to select out the foreground
+                # features.
+                mask_features = box_features[torch.cat(fg_selection_masks, dim=0)]
+                del box_features
+                losses.update(self.mask_head(mask_features, proposals))
+            return [], losses
+        else:
+            pred_instances, _ = self.box_predictor.inference(predictions, proposals)
+            pred_instances = self.forward_with_given_boxes(features, pred_instances)
+            return pred_instances, {}
+
+    def forward_with_given_boxes(self, features, instances):
+        """
+        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+
+        Args:
+            features: same as in `forward()`
+            instances (list[Instances]): instances to predict other outputs. Expect the keys
+                "pred_boxes" and "pred_classes" to exist.
+
+        Returns:
+            instances (Instances):
+                the same `Instances` object, with extra
+                fields such as `pred_masks` or `pred_keypoints`.
+        """
+        assert not self.training
+        assert instances[0].has("pred_boxes") and instances[0].has("pred_classes")
+
+        if self.mask_on:
+            features = [features[f] for f in self.in_features]
+            x = self._shared_roi_transform(features, [x.pred_boxes for x in instances])
+            return self.mask_head(x, instances)
+        else:
+            return instances
+
+
+@ROI_HEADS_REGISTRY.register()
+class StandardROIHeads(ROIHeads):
+    """
+    It's "standard" in a sense that there is no ROI transform sharing
+    or feature sharing between tasks.
+    Each head independently processes the input features by each head's
+    own pooler and head.
+
+    This class is used by most models, such as FPN and C5.
+    To implement more models, you can subclass it and implement a different
+    :meth:`forward()` or a head.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        box_in_features: List[str],
+        box_pooler: ROIPooler,
+        box_head: nn.Module,
+        box_predictor: nn.Module,
+        mask_in_features: Optional[List[str]] = None,
+        mask_pooler: Optional[ROIPooler] = None,
+        mask_head: Optional[nn.Module] = None,
+        keypoint_in_features: Optional[List[str]] = None,
+        keypoint_pooler: Optional[ROIPooler] = None,
+        keypoint_head: Optional[nn.Module] = None,
+        train_on_pred_boxes: bool = False,
+        **kwargs
+    ):
+        """
+        NOTE: this interface is experimental.
+
+        Args:
+            box_in_features (list[str]): list of feature names to use for the box head.
+            box_pooler (ROIPooler): pooler to extra region features for box head
+            box_head (nn.Module): transform features to make box predictions
+            box_predictor (nn.Module): make box predictions from the feature.
+                Should have the same interface as :class:`FastRCNNOutputLayers`.
+            mask_in_features (list[str]): list of feature names to use for the mask
+                pooler or mask head. None if not using mask head.
+            mask_pooler (ROIPooler): pooler to extract region features from image features.
+                The mask head will then take region features to make predictions.
+                If None, the mask head will directly take the dict of image features
+                defined by `mask_in_features`
+            mask_head (nn.Module): transform features to make mask predictions
+            keypoint_in_features, keypoint_pooler, keypoint_head: similar to ``mask_*``.
+            train_on_pred_boxes (bool): whether to use proposal boxes or
+                predicted boxes from the box head to train other heads.
+        """
+        super().__init__(**kwargs)
+        # keep self.in_features for backward compatibility
+        self.in_features = self.box_in_features = box_in_features
+        self.box_pooler = box_pooler
+        self.box_head = box_head
+        self.box_predictor = box_predictor
+
+        self.mask_on = mask_in_features is not None
+        if self.mask_on:
+            self.mask_in_features = mask_in_features
+            self.mask_pooler = mask_pooler
+            self.mask_head = mask_head
+
+        self.keypoint_on = keypoint_in_features is not None
+        if self.keypoint_on:
+            self.keypoint_in_features = keypoint_in_features
+            self.keypoint_pooler = keypoint_pooler
+            self.keypoint_head = keypoint_head
+
+        self.train_on_pred_boxes = train_on_pred_boxes
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        ret = super().from_config(cfg)
+        ret["train_on_pred_boxes"] = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES
+        # Subclasses that have not been updated to use from_config style construction
+        # may have overridden _init_*_head methods. In this case, those overridden methods
+        # will not be classmethods and we need to avoid trying to call them here.
+        # We test for this with ismethod which only returns True for bound methods of cls.
+        # Such subclasses will need to handle calling their overridden _init_*_head methods.
+        if inspect.ismethod(cls._init_box_head):
+            ret.update(cls._init_box_head(cfg, input_shape))
+        if inspect.ismethod(cls._init_mask_head):
+            ret.update(cls._init_mask_head(cfg, input_shape))
+        if inspect.ismethod(cls._init_keypoint_head):
+            ret.update(cls._init_keypoint_head(cfg, input_shape))
+        return ret
+
+    @classmethod
+    def _init_box_head(cls, cfg, input_shape):
+        # fmt: off
+        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+        # fmt: on
+
+        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
+        # then we share the same predictors and therefore the channel counts must be the same
+        in_channels = [input_shape[f].channels for f in in_features]
+        # Check all channel counts are equal
+        assert len(set(in_channels)) == 1, in_channels
+        in_channels = in_channels[0]
+
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        # Here we split "box head" and "box predictor", which is mainly due to historical reasons.
+        # They are used together so the "box predictor" layers should be part of the "box head".
+        # New subclasses of ROIHeads do not need "box predictor"s.
+        box_head = build_box_head(
+            cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)
+        )
+        box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape)
+        return {
+            "box_in_features": in_features,
+            "box_pooler": box_pooler,
+            "box_head": box_head,
+            "box_predictor": box_predictor,
+        }
+
+    @classmethod
+    def _init_mask_head(cls, cfg, input_shape):
+        if not cfg.MODEL.MASK_ON:
+            return {}
+        # fmt: off
+        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
+        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio    = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type       = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE
+        # fmt: on
+
+        in_channels = [input_shape[f].channels for f in in_features][0]
+
+        ret = {"mask_in_features": in_features}
+        ret["mask_pooler"] = (
+            ROIPooler(
+                output_size=pooler_resolution,
+                scales=pooler_scales,
+                sampling_ratio=sampling_ratio,
+                pooler_type=pooler_type,
+            )
+            if pooler_type
+            else None
+        )
+        if pooler_type:
+            shape = ShapeSpec(
+                channels=in_channels, width=pooler_resolution, height=pooler_resolution
+            )
+        else:
+            shape = {f: input_shape[f] for f in in_features}
+        ret["mask_head"] = build_mask_head(cfg, shape)
+        return ret
+
+    @classmethod
+    def _init_keypoint_head(cls, cfg, input_shape):
+        if not cfg.MODEL.KEYPOINT_ON:
+            return {}
+        # fmt: off
+        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
+        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)  # noqa
+        sampling_ratio    = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type       = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE
+        # fmt: on
+
+        in_channels = [input_shape[f].channels for f in in_features][0]
+
+        ret = {"keypoint_in_features": in_features}
+        ret["keypoint_pooler"] = (
+            ROIPooler(
+                output_size=pooler_resolution,
+                scales=pooler_scales,
+                sampling_ratio=sampling_ratio,
+                pooler_type=pooler_type,
+            )
+            if pooler_type
+            else None
+        )
+        if pooler_type:
+            shape = ShapeSpec(
+                channels=in_channels, width=pooler_resolution, height=pooler_resolution
+            )
+        else:
+            shape = {f: input_shape[f] for f in in_features}
+        ret["keypoint_head"] = build_keypoint_head(cfg, shape)
+        return ret
+
+    def forward(
+        self,
+        images: ImageList,
+        features: Dict[str, torch.Tensor],
+        proposals: List[Instances],
+        targets: Optional[List[Instances]] = None,
+    ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]:
+        """
+        See :class:`ROIHeads.forward`.
+        """
+        del images
+        if self.training:
+            assert targets, "'targets' argument is required during training"
+            proposals = self.label_and_sample_proposals(proposals, targets)
+        del targets
+
+        if self.training:
+            losses = self._forward_box(features, proposals)
+            # Usually the original proposals used by the box head are used by the mask, keypoint
+            # heads. But when `self.train_on_pred_boxes is True`, proposals will contain boxes
+            # predicted by the box head.
+            losses.update(self._forward_mask(features, proposals))
+            losses.update(self._forward_keypoint(features, proposals))
+            return proposals, losses
+        else:
+            pred_instances = self._forward_box(features, proposals)
+            # During inference cascaded prediction is used: the mask and keypoints heads are only
+            # applied to the top scoring box detections.
+            pred_instances = self.forward_with_given_boxes(features, pred_instances)
+            return pred_instances, {}
+
+    def forward_with_given_boxes(
+        self, features: Dict[str, torch.Tensor], instances: List[Instances]
+    ) -> List[Instances]:
+        """
+        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+
+        This is useful for downstream tasks where a box is known, but need to obtain
+        other attributes (outputs of other heads).
+        Test-time augmentation also uses this.
+
+        Args:
+            features: same as in `forward()`
+            instances (list[Instances]): instances to predict other outputs. Expect the keys
+                "pred_boxes" and "pred_classes" to exist.
+
+        Returns:
+            list[Instances]:
+                the same `Instances` objects, with extra
+                fields such as `pred_masks` or `pred_keypoints`.
+        """
+        assert not self.training
+        assert instances[0].has("pred_boxes") and instances[0].has("pred_classes")
+
+        instances = self._forward_mask(features, instances)
+        instances = self._forward_keypoint(features, instances)
+        return instances
+
+    def _forward_box(self, features: Dict[str, torch.Tensor], proposals: List[Instances]):
+        """
+        Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`,
+            the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument.
+
+        Args:
+            features (dict[str, Tensor]): mapping from feature map names to tensor.
+                Same as in :meth:`ROIHeads.forward`.
+            proposals (list[Instances]): the per-image object proposals with
+                their matching ground truth.
+                Each has fields "proposal_boxes", and "objectness_logits",
+                "gt_classes", "gt_boxes".
+
+        Returns:
+            In training, a dict of losses.
+            In inference, a list of `Instances`, the predicted instances.
+        """
+        features = [features[f] for f in self.box_in_features]
+        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
+        box_features = self.box_head(box_features)
+        predictions = self.box_predictor(box_features)
+        del box_features
+
+        if self.training:
+            losses = self.box_predictor.losses(predictions, proposals)
+            # proposals is modified in-place below, so losses must be computed first.
+            if self.train_on_pred_boxes:
+                with torch.no_grad():
+                    pred_boxes = self.box_predictor.predict_boxes_for_gt_classes(
+                        predictions, proposals
+                    )
+                    for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes):
+                        proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image)
+            return losses
+        else:
+            pred_instances, _ = self.box_predictor.inference(predictions, proposals)
+            return pred_instances
+
+    def _forward_mask(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
+        """
+        Forward logic of the mask prediction branch.
+
+        Args:
+            features (dict[str, Tensor]): mapping from feature map names to tensor.
+                Same as in :meth:`ROIHeads.forward`.
+            instances (list[Instances]): the per-image instances to train/predict masks.
+                In training, they can be the proposals.
+                In inference, they can be the boxes predicted by R-CNN box head.
+
+        Returns:
+            In training, a dict of losses.
+            In inference, update `instances` with new fields "pred_masks" and return it.
+        """
+        if not self.mask_on:
+            return {} if self.training else instances
+
+        if self.training:
+            # head is only trained on positive proposals.
+            instances, _ = select_foreground_proposals(instances, self.num_classes)
+
+        if self.mask_pooler is not None:
+            features = [features[f] for f in self.mask_in_features]
+            boxes = [x.proposal_boxes if self.training else x.pred_boxes for x in instances]
+            features = self.mask_pooler(features, boxes)
+        else:
+            features = {f: features[f] for f in self.mask_in_features}
+        return self.mask_head(features, instances)
+
+    def _forward_keypoint(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
+        """
+        Forward logic of the keypoint prediction branch.
+
+        Args:
+            features (dict[str, Tensor]): mapping from feature map names to tensor.
+                Same as in :meth:`ROIHeads.forward`.
+            instances (list[Instances]): the per-image instances to train/predict keypoints.
+                In training, they can be the proposals.
+                In inference, they can be the boxes predicted by R-CNN box head.
+
+        Returns:
+            In training, a dict of losses.
+            In inference, update `instances` with new fields "pred_keypoints" and return it.
+        """
+        if not self.keypoint_on:
+            return {} if self.training else instances
+
+        if self.training:
+            # head is only trained on positive proposals with >=1 visible keypoints.
+            instances, _ = select_foreground_proposals(instances, self.num_classes)
+            instances = select_proposals_with_visible_keypoints(instances)
+
+        if self.keypoint_pooler is not None:
+            features = [features[f] for f in self.keypoint_in_features]
+            boxes = [x.proposal_boxes if self.training else x.pred_boxes for x in instances]
+            features = self.keypoint_pooler(features, boxes)
+        else:
+            features = {f: features[f] for f in self.keypoint_in_features}
+        return self.keypoint_head(features, instances)
diff --git a/src/sts/detectron2/modeling/roi_heads/rotated_fast_rcnn.py b/src/sts/detectron2/modeling/roi_heads/rotated_fast_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..186ae03ffd9c575cc6e065a2b06651c947b9953b
--- /dev/null
+++ b/src/sts/detectron2/modeling/roi_heads/rotated_fast_rcnn.py
@@ -0,0 +1,271 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+import torch
+
+from detectron2.config import configurable
+from detectron2.layers import ShapeSpec, batched_nms_rotated
+from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated
+from detectron2.utils.events import get_event_storage
+
+from ..box_regression import Box2BoxTransformRotated
+from ..poolers import ROIPooler
+from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals
+from .box_head import build_box_head
+from .fast_rcnn import FastRCNNOutputLayers
+from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads
+
+logger = logging.getLogger(__name__)
+
+"""
+Shape shorthand in this module:
+
+    N: number of images in the minibatch
+    R: number of ROIs, combined over all images, in the minibatch
+    Ri: number of ROIs in image i
+    K: number of foreground classes. E.g.,there are 80 foreground classes in COCO.
+
+Naming convention:
+
+    deltas: refers to the 5-d (dx, dy, dw, dh, da) deltas that parameterize the box2box
+    transform (see :class:`box_regression.Box2BoxTransformRotated`).
+
+    pred_class_logits: predicted class scores in [-inf, +inf]; use
+        softmax(pred_class_logits) to estimate P(class).
+
+    gt_classes: ground-truth classification labels in [0, K], where [0, K) represent
+        foreground object classes and K represents the background class.
+
+    pred_proposal_deltas: predicted rotated box2box transform deltas for transforming proposals
+        to detection box predictions.
+
+    gt_proposal_deltas: ground-truth rotated box2box transform deltas
+"""
+
+
+def fast_rcnn_inference_rotated(
+    boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image
+):
+    """
+    Call `fast_rcnn_inference_single_image_rotated` for all images.
+
+    Args:
+        boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic
+            boxes for each image. Element i has shape (Ri, K * 5) if doing
+            class-specific regression, or (Ri, 5) if doing class-agnostic
+            regression, where Ri is the number of predicted objects for image i.
+            This is compatible with the output of :meth:`FastRCNNOutputs.predict_boxes`.
+        scores (list[Tensor]): A list of Tensors of predicted class scores for each image.
+            Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
+            for image i. Compatible with the output of :meth:`FastRCNNOutputs.predict_probs`.
+        image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch.
+        score_thresh (float): Only return detections with a confidence score exceeding this
+            threshold.
+        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1].
+        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
+            all detections.
+
+    Returns:
+        instances: (list[Instances]): A list of N instances, one for each image in the batch,
+            that stores the topk most confidence detections.
+        kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates
+            the corresponding boxes/scores index in [0, Ri) from the input, for image i.
+    """
+    result_per_image = [
+        fast_rcnn_inference_single_image_rotated(
+            boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image
+        )
+        for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes)
+    ]
+    return [x[0] for x in result_per_image], [x[1] for x in result_per_image]
+
+
+def fast_rcnn_inference_single_image_rotated(
+    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
+):
+    """
+    Single-image inference. Return rotated bounding-box detection results by thresholding
+    on scores and applying rotated non-maximum suppression (Rotated NMS).
+
+    Args:
+        Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes
+        per image.
+
+    Returns:
+        Same as `fast_rcnn_inference_rotated`, but for only one image.
+    """
+    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
+    if not valid_mask.all():
+        boxes = boxes[valid_mask]
+        scores = scores[valid_mask]
+
+    B = 5  # box dimension
+    scores = scores[:, :-1]
+    num_bbox_reg_classes = boxes.shape[1] // B
+    # Convert to Boxes to use the `clip` function ...
+    boxes = RotatedBoxes(boxes.reshape(-1, B))
+    boxes.clip(image_shape)
+    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B)  # R x C x B
+    # Filter results based on detection scores
+    filter_mask = scores > score_thresh  # R x K
+    # R' x 2. First column contains indices of the R predictions;
+    # Second column contains indices of classes.
+    filter_inds = filter_mask.nonzero()
+    if num_bbox_reg_classes == 1:
+        boxes = boxes[filter_inds[:, 0], 0]
+    else:
+        boxes = boxes[filter_mask]
+    scores = scores[filter_mask]
+
+    # Apply per-class Rotated NMS
+    keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh)
+    if topk_per_image >= 0:
+        keep = keep[:topk_per_image]
+    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
+
+    result = Instances(image_shape)
+    result.pred_boxes = RotatedBoxes(boxes)
+    result.scores = scores
+    result.pred_classes = filter_inds[:, 1]
+
+    return result, filter_inds[:, 0]
+
+
+class RotatedFastRCNNOutputLayers(FastRCNNOutputLayers):
+    """
+    Two linear layers for predicting Rotated Fast R-CNN outputs.
+    """
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        args = super().from_config(cfg, input_shape)
+        args["box2box_transform"] = Box2BoxTransformRotated(
+            weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS
+        )
+        return args
+
+    def inference(self, predictions, proposals):
+        """
+        Returns:
+            list[Instances]: same as `fast_rcnn_inference_rotated`.
+            list[Tensor]: same as `fast_rcnn_inference_rotated`.
+        """
+        boxes = self.predict_boxes(predictions, proposals)
+        scores = self.predict_probs(predictions, proposals)
+        image_shapes = [x.image_size for x in proposals]
+
+        return fast_rcnn_inference_rotated(
+            boxes,
+            scores,
+            image_shapes,
+            self.test_score_thresh,
+            self.test_nms_thresh,
+            self.test_topk_per_image,
+        )
+
+
+@ROI_HEADS_REGISTRY.register()
+class RROIHeads(StandardROIHeads):
+    """
+    This class is used by Rotated Fast R-CNN to detect rotated boxes.
+    For now, it only supports box predictions but not mask or keypoints.
+    """
+
+    @configurable
+    def __init__(self, **kwargs):
+        """
+        NOTE: this interface is experimental.
+        """
+        super().__init__(**kwargs)
+        assert (
+            not self.mask_on and not self.keypoint_on
+        ), "Mask/Keypoints not supported in Rotated ROIHeads."
+        assert not self.train_on_pred_boxes, "train_on_pred_boxes not implemented for RROIHeads!"
+
+    @classmethod
+    def _init_box_head(cls, cfg, input_shape):
+        # fmt: off
+        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+        # fmt: on
+        assert pooler_type in ["ROIAlignRotated"], pooler_type
+        # assume all channel counts are equal
+        in_channels = [input_shape[f].channels for f in in_features][0]
+
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        box_head = build_box_head(
+            cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)
+        )
+        # This line is the only difference v.s. StandardROIHeads
+        box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape)
+        return {
+            "box_in_features": in_features,
+            "box_pooler": box_pooler,
+            "box_head": box_head,
+            "box_predictor": box_predictor,
+        }
+
+    @torch.no_grad()
+    def label_and_sample_proposals(self, proposals, targets):
+        """
+        Prepare some proposals to be used to train the RROI heads.
+        It performs box matching between `proposals` and `targets`, and assigns
+        training labels to the proposals.
+        It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes,
+        with a fraction of positives that is no larger than `self.positive_sample_fraction.
+
+        Args:
+            See :meth:`StandardROIHeads.forward`
+
+        Returns:
+            list[Instances]: length `N` list of `Instances`s containing the proposals
+                sampled for training. Each `Instances` has the following fields:
+                - proposal_boxes: the rotated proposal boxes
+                - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to
+                  (this is only meaningful if the proposal has a label > 0; if label = 0
+                   then the ground-truth box is random)
+                - gt_classes: the ground-truth classification lable for each proposal
+        """
+        gt_boxes = [x.gt_boxes for x in targets]
+        if self.proposal_append_gt:
+            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)
+
+        proposals_with_gt = []
+
+        num_fg_samples = []
+        num_bg_samples = []
+        for proposals_per_image, targets_per_image in zip(proposals, targets):
+            has_gt = len(targets_per_image) > 0
+            match_quality_matrix = pairwise_iou_rotated(
+                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
+            )
+            matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix)
+            sampled_idxs, gt_classes = self._sample_proposals(
+                matched_idxs, matched_labels, targets_per_image.gt_classes
+            )
+
+            proposals_per_image = proposals_per_image[sampled_idxs]
+            proposals_per_image.gt_classes = gt_classes
+
+            if has_gt:
+                sampled_targets = matched_idxs[sampled_idxs]
+                proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets]
+
+            num_bg_samples.append((gt_classes == self.num_classes).sum().item())
+            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
+            proposals_with_gt.append(proposals_per_image)
+
+        # Log the number of fg/bg samples that are selected for training ROI heads
+        storage = get_event_storage()
+        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
+        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))
+
+        return proposals_with_gt
diff --git a/src/sts/detectron2/modeling/sampling.py b/src/sts/detectron2/modeling/sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2d0f6648b349c5ea39fd29785b77c961a58fa22
--- /dev/null
+++ b/src/sts/detectron2/modeling/sampling.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import torch
+
+from detectron2.layers import nonzero_tuple
+
+__all__ = ["subsample_labels"]
+
+
+def subsample_labels(
+    labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int
+):
+    """
+    Return `num_samples` (or fewer, if not enough found)
+    random samples from `labels` which is a mixture of positives & negatives.
+    It will try to return as many positives as possible without
+    exceeding `positive_fraction * num_samples`, and then try to
+    fill the remaining slots with negatives.
+
+    Args:
+        labels (Tensor): (N, ) label vector with values:
+            * -1: ignore
+            * bg_label: background ("negative") class
+            * otherwise: one or more foreground ("positive") classes
+        num_samples (int): The total number of labels with value >= 0 to return.
+            Values that are not sampled will be filled with -1 (ignore).
+        positive_fraction (float): The number of subsampled labels with values > 0
+            is `min(num_positives, int(positive_fraction * num_samples))`. The number
+            of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
+            In order words, if there are not enough positives, the sample is filled with
+            negatives. If there are also not enough negatives, then as many elements are
+            sampled as is possible.
+        bg_label (int): label index of background ("negative") class.
+
+    Returns:
+        pos_idx, neg_idx (Tensor):
+            1D vector of indices. The total length of both is `num_samples` or fewer.
+    """
+    positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0]
+    negative = nonzero_tuple(labels == bg_label)[0]
+
+    num_pos = int(num_samples * positive_fraction)
+    # protect against not enough positive examples
+    num_pos = min(positive.numel(), num_pos)
+    num_neg = num_samples - num_pos
+    # protect against not enough negative examples
+    num_neg = min(negative.numel(), num_neg)
+
+    # randomly select positive and negative examples
+    perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
+    perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
+
+    pos_idx = positive[perm1]
+    neg_idx = negative[perm2]
+    return pos_idx, neg_idx
diff --git a/src/sts/detectron2/modeling/test_time_augmentation.py b/src/sts/detectron2/modeling/test_time_augmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..373e6bf00a39c040ff1da49d6dcd39a54a0b69a7
--- /dev/null
+++ b/src/sts/detectron2/modeling/test_time_augmentation.py
@@ -0,0 +1,307 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import numpy as np
+from contextlib import contextmanager
+from itertools import count
+from typing import List
+import torch
+from fvcore.transforms import HFlipTransform, NoOpTransform
+from torch import nn
+from torch.nn.parallel import DistributedDataParallel
+
+from detectron2.config import configurable
+from detectron2.data.detection_utils import read_image
+from detectron2.data.transforms import (
+    RandomFlip,
+    ResizeShortestEdge,
+    ResizeTransform,
+    apply_augmentations,
+)
+from detectron2.structures import Boxes, Instances
+
+from .meta_arch import GeneralizedRCNN
+from .postprocessing import detector_postprocess
+from .roi_heads.fast_rcnn import fast_rcnn_inference_single_image
+
+__all__ = ["DatasetMapperTTA", "GeneralizedRCNNWithTTA"]
+
+
+class DatasetMapperTTA:
+    """
+    Implement test-time augmentation for detection data.
+    It is a callable which takes a dataset dict from a detection dataset,
+    and returns a list of dataset dicts where the images
+    are augmented from the input image by the transformations defined in the config.
+    This is used for test-time augmentation.
+    """
+
+    @configurable
+    def __init__(self, min_sizes: List[int], max_size: int, flip: bool):
+        """
+        Args:
+            min_sizes: list of short-edge size to resize the image to
+            max_size: maximum height or width of resized images
+            flip: whether to apply flipping augmentation
+        """
+        self.min_sizes = min_sizes
+        self.max_size = max_size
+        self.flip = flip
+
+    @classmethod
+    def from_config(cls, cfg):
+        return {
+            "min_sizes": cfg.TEST.AUG.MIN_SIZES,
+            "max_size": cfg.TEST.AUG.MAX_SIZE,
+            "flip": cfg.TEST.AUG.FLIP,
+        }
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dict: a dict in standard model input format. See tutorials for details.
+
+        Returns:
+            list[dict]:
+                a list of dicts, which contain augmented version of the input image.
+                The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``.
+                Each dict has field "transforms" which is a TransformList,
+                containing the transforms that are used to generate this image.
+        """
+        numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
+        shape = numpy_image.shape
+        orig_shape = (dataset_dict["height"], dataset_dict["width"])
+        if shape[:2] != orig_shape:
+            # It transforms the "original" image in the dataset to the input image
+            pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1])
+        else:
+            pre_tfm = NoOpTransform()
+
+        # Create all combinations of augmentations to use
+        aug_candidates = []  # each element is a list[Augmentation]
+        for min_size in self.min_sizes:
+            resize = ResizeShortestEdge(min_size, self.max_size)
+            aug_candidates.append([resize])  # resize only
+            if self.flip:
+                flip = RandomFlip(prob=1.0)
+                aug_candidates.append([resize, flip])  # resize + flip
+
+        # Apply all the augmentations
+        ret = []
+        for aug in aug_candidates:
+            new_image, tfms = apply_augmentations(aug, np.copy(numpy_image))
+            torch_image = torch.from_numpy(np.ascontiguousarray(new_image.transpose(2, 0, 1)))
+
+            dic = copy.deepcopy(dataset_dict)
+            dic["transforms"] = pre_tfm + tfms
+            dic["image"] = torch_image
+            ret.append(dic)
+        return ret
+
+
+class GeneralizedRCNNWithTTA(nn.Module):
+    """
+    A GeneralizedRCNN with test-time augmentation enabled.
+    Its :meth:`__call__` method has the same interface as :meth:`GeneralizedRCNN.forward`.
+    """
+
+    def __init__(self, cfg, model, tta_mapper=None, batch_size=3):
+        """
+        Args:
+            cfg (CfgNode):
+            model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
+            tta_mapper (callable): takes a dataset dict and returns a list of
+                augmented versions of the dataset dict. Defaults to
+                `DatasetMapperTTA(cfg)`.
+            batch_size (int): batch the augmented images into this batch size for inference.
+        """
+        super().__init__()
+        if isinstance(model, DistributedDataParallel):
+            model = model.module
+        assert isinstance(
+            model, GeneralizedRCNN
+        ), "TTA is only supported on GeneralizedRCNN. Got a model of type {}".format(type(model))
+        self.cfg = cfg.clone()
+        assert not self.cfg.MODEL.KEYPOINT_ON, "TTA for keypoint is not supported yet"
+        assert (
+            not self.cfg.MODEL.LOAD_PROPOSALS
+        ), "TTA for pre-computed proposals is not supported yet"
+
+        self.model = model
+
+        if tta_mapper is None:
+            tta_mapper = DatasetMapperTTA(cfg)
+        self.tta_mapper = tta_mapper
+        self.batch_size = batch_size
+
+    @contextmanager
+    def _turn_off_roi_heads(self, attrs):
+        """
+        Open a context where some heads in `model.roi_heads` are temporarily turned off.
+        Args:
+            attr (list[str]): the attribute in `model.roi_heads` which can be used
+                to turn off a specific head, e.g., "mask_on", "keypoint_on".
+        """
+        roi_heads = self.model.roi_heads
+        old = {}
+        for attr in attrs:
+            try:
+                old[attr] = getattr(roi_heads, attr)
+            except AttributeError:
+                # The head may not be implemented in certain ROIHeads
+                pass
+
+        if len(old.keys()) == 0:
+            yield
+        else:
+            for attr in old.keys():
+                setattr(roi_heads, attr, False)
+            yield
+            for attr in old.keys():
+                setattr(roi_heads, attr, old[attr])
+
+    def _batch_inference(self, batched_inputs, detected_instances=None):
+        """
+        Execute inference on a list of inputs,
+        using batch size = self.batch_size, instead of the length of the list.
+
+        Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference`
+        """
+        if detected_instances is None:
+            detected_instances = [None] * len(batched_inputs)
+
+        outputs = []
+        inputs, instances = [], []
+        for idx, input, instance in zip(count(), batched_inputs, detected_instances):
+            inputs.append(input)
+            instances.append(instance)
+            if len(inputs) == self.batch_size or idx == len(batched_inputs) - 1:
+                outputs.extend(
+                    self.model.inference(
+                        inputs,
+                        instances if instances[0] is not None else None,
+                        do_postprocess=False,
+                    )
+                )
+                inputs, instances = [], []
+        return outputs
+
+    def __call__(self, batched_inputs):
+        """
+        Same input/output format as :meth:`GeneralizedRCNN.forward`
+        """
+
+        def _maybe_read_image(dataset_dict):
+            ret = copy.copy(dataset_dict)
+            if "image" not in ret:
+                image = read_image(ret.pop("file_name"), self.model.input_format)
+                image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1)))  # CHW
+                ret["image"] = image
+            if "height" not in ret and "width" not in ret:
+                ret["height"] = image.shape[1]
+                ret["width"] = image.shape[2]
+            return ret
+
+        return [self._inference_one_image(_maybe_read_image(x)) for x in batched_inputs]
+
+    def _inference_one_image(self, input):
+        """
+        Args:
+            input (dict): one dataset dict with "image" field being a CHW tensor
+
+        Returns:
+            dict: one output dict
+        """
+        orig_shape = (input["height"], input["width"])
+        augmented_inputs, tfms = self._get_augmented_inputs(input)
+        # Detect boxes from all augmented versions
+        with self._turn_off_roi_heads(["mask_on", "keypoint_on"]):
+            # temporarily disable roi heads
+            all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms)
+        # merge all detected boxes to obtain final predictions for boxes
+        merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape)
+
+        if self.cfg.MODEL.MASK_ON:
+            # Use the detected boxes to obtain masks
+            augmented_instances = self._rescale_detected_boxes(
+                augmented_inputs, merged_instances, tfms
+            )
+            # run forward on the detected boxes
+            outputs = self._batch_inference(augmented_inputs, augmented_instances)
+            # Delete now useless variables to avoid being out of memory
+            del augmented_inputs, augmented_instances
+            # average the predictions
+            merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms)
+            merged_instances = detector_postprocess(merged_instances, *orig_shape)
+            return {"instances": merged_instances}
+        else:
+            return {"instances": merged_instances}
+
+    def _get_augmented_inputs(self, input):
+        augmented_inputs = self.tta_mapper(input)
+        tfms = [x.pop("transforms") for x in augmented_inputs]
+        return augmented_inputs, tfms
+
+    def _get_augmented_boxes(self, augmented_inputs, tfms):
+        # 1: forward with all augmented images
+        outputs = self._batch_inference(augmented_inputs)
+        # 2: union the results
+        all_boxes = []
+        all_scores = []
+        all_classes = []
+        for output, tfm in zip(outputs, tfms):
+            # Need to inverse the transforms on boxes, to obtain results on original image
+            pred_boxes = output.pred_boxes.tensor
+            original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy())
+            all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device))
+
+            all_scores.extend(output.scores)
+            all_classes.extend(output.pred_classes)
+        all_boxes = torch.cat(all_boxes, dim=0)
+        return all_boxes, all_scores, all_classes
+
+    def _merge_detections(self, all_boxes, all_scores, all_classes, shape_hw):
+        # select from the union of all results
+        num_boxes = len(all_boxes)
+        num_classes = self.cfg.MODEL.ROI_HEADS.NUM_CLASSES
+        # +1 because fast_rcnn_inference expects background scores as well
+        all_scores_2d = torch.zeros(num_boxes, num_classes + 1, device=all_boxes.device)
+        for idx, cls, score in zip(count(), all_classes, all_scores):
+            all_scores_2d[idx, cls] = score
+
+        merged_instances, _ = fast_rcnn_inference_single_image(
+            all_boxes,
+            all_scores_2d,
+            shape_hw,
+            1e-8,
+            self.cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST,
+            self.cfg.TEST.DETECTIONS_PER_IMAGE,
+        )
+
+        return merged_instances
+
+    def _rescale_detected_boxes(self, augmented_inputs, merged_instances, tfms):
+        augmented_instances = []
+        for input, tfm in zip(augmented_inputs, tfms):
+            # Transform the target box to the augmented image's coordinate space
+            pred_boxes = merged_instances.pred_boxes.tensor.cpu().numpy()
+            pred_boxes = torch.from_numpy(tfm.apply_box(pred_boxes))
+
+            aug_instances = Instances(
+                image_size=input["image"].shape[1:3],
+                pred_boxes=Boxes(pred_boxes),
+                pred_classes=merged_instances.pred_classes,
+                scores=merged_instances.scores,
+            )
+            augmented_instances.append(aug_instances)
+        return augmented_instances
+
+    def _reduce_pred_masks(self, outputs, tfms):
+        # Should apply inverse transforms on masks.
+        # We assume only resize & flip are used. pred_masks is a scale-invariant
+        # representation, so we handle flip specially
+        for output, tfm in zip(outputs, tfms):
+            if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
+                output.pred_masks = output.pred_masks.flip(dims=[3])
+        all_pred_masks = torch.stack([o.pred_masks for o in outputs], dim=0)
+        avg_pred_masks = torch.mean(all_pred_masks, dim=0)
+        return avg_pred_masks
diff --git a/src/sts/detectron2/projects/README.md b/src/sts/detectron2/projects/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..95afe7ff8c8a9bd2f56621fcc3c1bdac11c256a9
--- /dev/null
+++ b/src/sts/detectron2/projects/README.md
@@ -0,0 +1,2 @@
+
+Projects live in the [`projects` directory](../../projects) under the root of this repository, but not here.
diff --git a/src/sts/detectron2/projects/__init__.py b/src/sts/detectron2/projects/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f691fb71c3d939059090f668c25737dd297f6160
--- /dev/null
+++ b/src/sts/detectron2/projects/__init__.py
@@ -0,0 +1,31 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import importlib
+from pathlib import Path
+
+_PROJECTS = {
+    "point_rend": "PointRend",
+    "deeplab": "DeepLab",
+    "panoptic_deeplab": "Panoptic-DeepLab",
+}
+_PROJECT_ROOT = Path(__file__).parent.parent.parent / "projects"
+
+if _PROJECT_ROOT.is_dir():
+    # This is true only for in-place installation (pip install -e, setup.py develop),
+    # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
+
+    class _D2ProjectsFinder(importlib.abc.MetaPathFinder):
+        def find_spec(self, name, path, target=None):
+            if not name.startswith("detectron2.projects."):
+                return
+            project_name = name.split(".")[-1]
+            project_dir = _PROJECTS.get(project_name)
+            if not project_dir:
+                return
+            target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py"
+            if not target_file.is_file():
+                return
+            return importlib.util.spec_from_file_location(name, target_file)
+
+    import sys
+
+    sys.meta_path.append(_D2ProjectsFinder())
diff --git a/src/sts/detectron2/solver/__init__.py b/src/sts/detectron2/solver/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a2dbd35bb24f0d4a979bc8f304142376d87e7ec
--- /dev/null
+++ b/src/sts/detectron2/solver/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params
+from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR, LRMultiplier, WarmupParamScheduler
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/src/sts/detectron2/solver/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/solver/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e9d62fe4d750567e5c0872d679727a44976edc0
Binary files /dev/null and b/src/sts/detectron2/solver/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/solver/__pycache__/build.cpython-38.pyc b/src/sts/detectron2/solver/__pycache__/build.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e409b49019162e794a01d7ac12650ca71c55718
Binary files /dev/null and b/src/sts/detectron2/solver/__pycache__/build.cpython-38.pyc differ
diff --git a/src/sts/detectron2/solver/__pycache__/lr_scheduler.cpython-38.pyc b/src/sts/detectron2/solver/__pycache__/lr_scheduler.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0485d8f902e0cd1a6d6fc89839849d696a76e627
Binary files /dev/null and b/src/sts/detectron2/solver/__pycache__/lr_scheduler.cpython-38.pyc differ
diff --git a/src/sts/detectron2/solver/build.py b/src/sts/detectron2/solver/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8d1c0316ec703d4c9ca322a1eb1741a4751c6d5
--- /dev/null
+++ b/src/sts/detectron2/solver/build.py
@@ -0,0 +1,252 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import logging
+from enum import Enum
+from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Type, Union
+import torch
+from fvcore.common.param_scheduler import CosineParamScheduler, MultiStepParamScheduler
+
+from detectron2.config import CfgNode
+
+from .lr_scheduler import LRMultiplier, WarmupParamScheduler
+
+_GradientClipperInput = Union[torch.Tensor, Iterable[torch.Tensor]]
+_GradientClipper = Callable[[_GradientClipperInput], None]
+
+
+class GradientClipType(Enum):
+    VALUE = "value"
+    NORM = "norm"
+
+
+def _create_gradient_clipper(cfg: CfgNode) -> _GradientClipper:
+    """
+    Creates gradient clipping closure to clip by value or by norm,
+    according to the provided config.
+    """
+    cfg = copy.deepcopy(cfg)
+
+    def clip_grad_norm(p: _GradientClipperInput):
+        torch.nn.utils.clip_grad_norm_(p, cfg.CLIP_VALUE, cfg.NORM_TYPE)
+
+    def clip_grad_value(p: _GradientClipperInput):
+        torch.nn.utils.clip_grad_value_(p, cfg.CLIP_VALUE)
+
+    _GRADIENT_CLIP_TYPE_TO_CLIPPER = {
+        GradientClipType.VALUE: clip_grad_value,
+        GradientClipType.NORM: clip_grad_norm,
+    }
+    return _GRADIENT_CLIP_TYPE_TO_CLIPPER[GradientClipType(cfg.CLIP_TYPE)]
+
+
+def _generate_optimizer_class_with_gradient_clipping(
+    optimizer: Type[torch.optim.Optimizer],
+    *,
+    per_param_clipper: Optional[_GradientClipper] = None,
+    global_clipper: Optional[_GradientClipper] = None,
+) -> Type[torch.optim.Optimizer]:
+    """
+    Dynamically creates a new type that inherits the type of a given instance
+    and overrides the `step` method to add gradient clipping
+    """
+    assert (
+        per_param_clipper is None or global_clipper is None
+    ), "Not allowed to use both per-parameter clipping and global clipping"
+
+    def optimizer_wgc_step(self, closure=None):
+        if per_param_clipper is not None:
+            for group in self.param_groups:
+                for p in group["params"]:
+                    per_param_clipper(p)
+        else:
+            # global clipper for future use with detr
+            # (https://github.com/facebookresearch/detr/pull/287)
+            all_params = itertools.chain(*[g["params"] for g in self.param_groups])
+            global_clipper(all_params)
+        super(type(self), self).step(closure)
+
+    OptimizerWithGradientClip = type(
+        optimizer.__name__ + "WithGradientClip",
+        (optimizer,),
+        {"step": optimizer_wgc_step},
+    )
+    return OptimizerWithGradientClip
+
+
+def maybe_add_gradient_clipping(
+    cfg: CfgNode, optimizer: Type[torch.optim.Optimizer]
+) -> Type[torch.optim.Optimizer]:
+    """
+    If gradient clipping is enabled through config options, wraps the existing
+    optimizer type to become a new dynamically created class OptimizerWithGradientClip
+    that inherits the given optimizer and overrides the `step` method to
+    include gradient clipping.
+
+    Args:
+        cfg: CfgNode, configuration options
+        optimizer: type. A subclass of torch.optim.Optimizer
+
+    Return:
+        type: either the input `optimizer` (if gradient clipping is disabled), or
+            a subclass of it with gradient clipping included in the `step` method.
+    """
+    if not cfg.SOLVER.CLIP_GRADIENTS.ENABLED:
+        return optimizer
+    if isinstance(optimizer, torch.optim.Optimizer):
+        optimizer_type = type(optimizer)
+    else:
+        assert issubclass(optimizer, torch.optim.Optimizer), optimizer
+        optimizer_type = optimizer
+
+    grad_clipper = _create_gradient_clipper(cfg.SOLVER.CLIP_GRADIENTS)
+    OptimizerWithGradientClip = _generate_optimizer_class_with_gradient_clipping(
+        optimizer_type, per_param_clipper=grad_clipper
+    )
+    if isinstance(optimizer, torch.optim.Optimizer):
+        optimizer.__class__ = OptimizerWithGradientClip  # a bit hacky, not recommended
+        return optimizer
+    else:
+        return OptimizerWithGradientClip
+
+
+def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer:
+    """
+    Build an optimizer from config.
+    """
+    params = get_default_optimizer_params(
+        model,
+        base_lr=cfg.SOLVER.BASE_LR,
+        weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
+        bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
+        weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
+    )
+    return maybe_add_gradient_clipping(cfg, torch.optim.SGD)(
+        params,
+        lr=cfg.SOLVER.BASE_LR,
+        momentum=cfg.SOLVER.MOMENTUM,
+        nesterov=cfg.SOLVER.NESTEROV,
+        weight_decay=cfg.SOLVER.WEIGHT_DECAY,
+    )
+
+
+def get_default_optimizer_params(
+    model: torch.nn.Module,
+    base_lr: Optional[float] = None,
+    weight_decay: Optional[float] = None,
+    weight_decay_norm: Optional[float] = None,
+    bias_lr_factor: Optional[float] = 1.0,
+    weight_decay_bias: Optional[float] = None,
+    overrides: Optional[Dict[str, Dict[str, float]]] = None,
+):
+    """
+    Get default param list for optimizer, with support for a few types of
+    overrides. If not overrides needed, this is equivalent to `model.parameters()`.
+
+    Args:
+        base_lr: lr for every group by default. Can be omitted to use the one in optimizer.
+        weight_decay: weight decay for every group by default. Can be omitted to use the one
+            in optimizer.
+        weight_decay_norm: override weight decay for params in normalization layers
+        bias_lr_factor: multiplier of lr for bias parameters.
+        weight_decay_bias: override weight decay for bias parameters
+        overrides: if not `None`, provides values for optimizer hyperparameters
+            (LR, weight decay) for module parameters with a given name; e.g.
+            ``{"embedding": {"lr": 0.01, "weight_decay": 0.1}}`` will set the LR and
+            weight decay values for all module parameters named `embedding`.
+
+    For common detection models, ``weight_decay_norm`` is the only option
+    needed to be set. ``bias_lr_factor,weight_decay_bias`` are legacy settings
+    from Detectron1 that are not found useful.
+
+    Example:
+    ::
+        torch.optim.SGD(get_default_optimizer_params(model, weight_decay_norm=0),
+                       lr=0.01, weight_decay=1e-4, momentum=0.9)
+    """
+    if overrides is None:
+        overrides = {}
+    defaults = {}
+    if base_lr is not None:
+        defaults["lr"] = base_lr
+    if weight_decay is not None:
+        defaults["weight_decay"] = weight_decay
+    bias_overrides = {}
+    if bias_lr_factor is not None and bias_lr_factor != 1.0:
+        # NOTE: unlike Detectron v1, we now by default make bias hyperparameters
+        # exactly the same as regular weights.
+        if base_lr is None:
+            raise ValueError("bias_lr_factor requires base_lr")
+        bias_overrides["lr"] = base_lr * bias_lr_factor
+    if weight_decay_bias is not None:
+        bias_overrides["weight_decay"] = weight_decay_bias
+    if len(bias_overrides):
+        if "bias" in overrides:
+            raise ValueError("Conflicting overrides for 'bias'")
+        overrides["bias"] = bias_overrides
+
+    norm_module_types = (
+        torch.nn.BatchNorm1d,
+        torch.nn.BatchNorm2d,
+        torch.nn.BatchNorm3d,
+        torch.nn.SyncBatchNorm,
+        # NaiveSyncBatchNorm inherits from BatchNorm2d
+        torch.nn.GroupNorm,
+        torch.nn.InstanceNorm1d,
+        torch.nn.InstanceNorm2d,
+        torch.nn.InstanceNorm3d,
+        torch.nn.LayerNorm,
+        torch.nn.LocalResponseNorm,
+    )
+    params: List[Dict[str, Any]] = []
+    memo: Set[torch.nn.parameter.Parameter] = set()
+    for module in model.modules():
+        for module_param_name, value in module.named_parameters(recurse=False):
+            if not value.requires_grad:
+                continue
+            # Avoid duplicating parameters
+            if value in memo:
+                continue
+            memo.add(value)
+
+            hyperparams = copy.copy(defaults)
+            if isinstance(module, norm_module_types) and weight_decay_norm is not None:
+                hyperparams["weight_decay"] = weight_decay_norm
+            hyperparams.update(overrides.get(module_param_name, {}))
+            params.append({"params": [value], **hyperparams})
+    return params
+
+
+def build_lr_scheduler(
+    cfg: CfgNode, optimizer: torch.optim.Optimizer
+) -> torch.optim.lr_scheduler._LRScheduler:
+    """
+    Build a LR scheduler from config.
+    """
+    name = cfg.SOLVER.LR_SCHEDULER_NAME
+
+    if name == "WarmupMultiStepLR":
+        steps = [x for x in cfg.SOLVER.STEPS if x <= cfg.SOLVER.MAX_ITER]
+        if len(steps) != len(cfg.SOLVER.STEPS):
+            logger = logging.getLogger(__name__)
+            logger.warning(
+                "SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. "
+                "These values will be ignored."
+            )
+        sched = MultiStepParamScheduler(
+            values=[cfg.SOLVER.GAMMA ** k for k in range(len(steps) + 1)],
+            milestones=steps,
+            num_updates=cfg.SOLVER.MAX_ITER,
+        )
+    elif name == "WarmupCosineLR":
+        sched = CosineParamScheduler(1, 0)
+    else:
+        raise ValueError("Unknown LR scheduler: {}".format(name))
+
+    sched = WarmupParamScheduler(
+        sched,
+        cfg.SOLVER.WARMUP_FACTOR,
+        cfg.SOLVER.WARMUP_ITERS / cfg.SOLVER.MAX_ITER,
+        cfg.SOLVER.WARMUP_METHOD,
+    )
+    return LRMultiplier(optimizer, multiplier=sched, max_iter=cfg.SOLVER.MAX_ITER)
diff --git a/src/sts/detectron2/solver/lr_scheduler.py b/src/sts/detectron2/solver/lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..8803e87b9e60cffdbe048c97c282d353191ae4c8
--- /dev/null
+++ b/src/sts/detectron2/solver/lr_scheduler.py
@@ -0,0 +1,238 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import math
+from bisect import bisect_right
+from typing import List
+import torch
+from fvcore.common.param_scheduler import (
+    CompositeParamScheduler,
+    ConstantParamScheduler,
+    LinearParamScheduler,
+    ParamScheduler,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class WarmupParamScheduler(CompositeParamScheduler):
+    """
+    Add an initial warmup stage to another scheduler.
+    """
+
+    def __init__(
+        self,
+        scheduler: ParamScheduler,
+        warmup_factor: float,
+        warmup_length: float,
+        warmup_method: str = "linear",
+    ):
+        """
+        Args:
+            scheduler: warmup will be added at the beginning of this scheduler
+            warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001
+            warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire
+                training, e.g. 0.01
+            warmup_method: one of "linear" or "constant"
+        """
+        end_value = scheduler(warmup_length)  # the value to reach when warmup ends
+        start_value = warmup_factor * scheduler(0.0)
+        if warmup_method == "constant":
+            warmup = ConstantParamScheduler(start_value)
+        elif warmup_method == "linear":
+            warmup = LinearParamScheduler(start_value, end_value)
+        else:
+            raise ValueError("Unknown warmup method: {}".format(warmup_method))
+        super().__init__(
+            [warmup, scheduler],
+            interval_scaling=["rescaled", "fixed"],
+            lengths=[warmup_length, 1 - warmup_length],
+        )
+
+
+class LRMultiplier(torch.optim.lr_scheduler._LRScheduler):
+    """
+    A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the
+    learning rate of each param in the optimizer.
+    Every step, the learning rate of each parameter becomes its initial value
+    multiplied by the output of the given :class:`ParamScheduler`.
+
+    The absolute learning rate value of each parameter can be different.
+    This scheduler can be used as long as the relative scale among them do
+    not change during training.
+
+    Examples:
+    ::
+        LRMultiplier(
+            opt,
+            WarmupParamScheduler(
+                MultiStepParamScheduler(
+                    [1, 0.1, 0.01],
+                    milestones=[60000, 80000],
+                    num_updates=90000,
+                ), 0.001, 100 / 90000
+            ),
+            max_iter=90000
+        )
+    """
+
+    # NOTES: in the most general case, every LR can use its own scheduler.
+    # Supporting this requires interaction with the optimizer when its parameter
+    # group is initialized. For example, classyvision implements its own optimizer
+    # that allows different schedulers for every parameter group.
+    # To avoid this complexity, we use this class to support the most common cases
+    # where the relative scale among all LRs stay unchanged during training.  In this
+    # case we only need a total of one scheduler that defines the relative LR multiplier.
+
+    def __init__(
+        self,
+        optimizer: torch.optim.Optimizer,
+        multiplier: ParamScheduler,
+        max_iter: int,
+        last_iter: int = -1,
+    ):
+        """
+        Args:
+            optimizer, last_iter: See ``torch.optim.lr_scheduler._LRScheduler``.
+                ``last_iter`` is the same as ``last_epoch``.
+            multiplier: a fvcore ParamScheduler that defines the multiplier on
+                every LR of the optimizer
+            max_iter: the total number of training iterations
+        """
+        if not isinstance(multiplier, ParamScheduler):
+            raise ValueError(
+                "_LRMultiplier(multiplier=) must be an instance of fvcore "
+                f"ParamScheduler. Got {multiplier} instead."
+            )
+        self._multiplier = multiplier
+        self._max_iter = max_iter
+        super().__init__(optimizer, last_epoch=last_iter)
+
+    def state_dict(self):
+        # fvcore schedulers are stateless. Only keep pytorch scheduler states
+        return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch}
+
+    def get_lr(self) -> List[float]:
+        multiplier = self._multiplier(self.last_epoch / self._max_iter)
+        return [base_lr * multiplier for base_lr in self.base_lrs]
+
+
+"""
+Content below is no longer needed!
+"""
+
+# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
+# only on epoch boundaries. We typically use iteration based schedules instead.
+# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean
+# "iteration" instead.
+
+# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating
+# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it.
+
+
+class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
+    def __init__(
+        self,
+        optimizer: torch.optim.Optimizer,
+        milestones: List[int],
+        gamma: float = 0.1,
+        warmup_factor: float = 0.001,
+        warmup_iters: int = 1000,
+        warmup_method: str = "linear",
+        last_epoch: int = -1,
+    ):
+        logger.warning(
+            "WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
+        )
+        if not list(milestones) == sorted(milestones):
+            raise ValueError(
+                "Milestones should be a list of" " increasing integers. Got {}", milestones
+            )
+        self.milestones = milestones
+        self.gamma = gamma
+        self.warmup_factor = warmup_factor
+        self.warmup_iters = warmup_iters
+        self.warmup_method = warmup_method
+        super().__init__(optimizer, last_epoch)
+
+    def get_lr(self) -> List[float]:
+        warmup_factor = _get_warmup_factor_at_iter(
+            self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
+        )
+        return [
+            base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
+            for base_lr in self.base_lrs
+        ]
+
+    def _compute_values(self) -> List[float]:
+        # The new interface
+        return self.get_lr()
+
+
+class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler):
+    def __init__(
+        self,
+        optimizer: torch.optim.Optimizer,
+        max_iters: int,
+        warmup_factor: float = 0.001,
+        warmup_iters: int = 1000,
+        warmup_method: str = "linear",
+        last_epoch: int = -1,
+    ):
+        logger.warning(
+            "WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
+        )
+        self.max_iters = max_iters
+        self.warmup_factor = warmup_factor
+        self.warmup_iters = warmup_iters
+        self.warmup_method = warmup_method
+        super().__init__(optimizer, last_epoch)
+
+    def get_lr(self) -> List[float]:
+        warmup_factor = _get_warmup_factor_at_iter(
+            self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
+        )
+        # Different definitions of half-cosine with warmup are possible. For
+        # simplicity we multiply the standard half-cosine schedule by the warmup
+        # factor. An alternative is to start the period of the cosine at warmup_iters
+        # instead of at 0. In the case that warmup_iters << max_iters the two are
+        # very close to each other.
+        return [
+            base_lr
+            * warmup_factor
+            * 0.5
+            * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters))
+            for base_lr in self.base_lrs
+        ]
+
+    def _compute_values(self) -> List[float]:
+        # The new interface
+        return self.get_lr()
+
+
+def _get_warmup_factor_at_iter(
+    method: str, iter: int, warmup_iters: int, warmup_factor: float
+) -> float:
+    """
+    Return the learning rate warmup factor at a specific iteration.
+    See :paper:`ImageNet in 1h` for more details.
+
+    Args:
+        method (str): warmup method; either "constant" or "linear".
+        iter (int): iteration at which to calculate the warmup factor.
+        warmup_iters (int): the number of warmup iterations.
+        warmup_factor (float): the base warmup factor (the meaning changes according
+            to the method used).
+
+    Returns:
+        float: the effective warmup factor at the given iteration.
+    """
+    if iter >= warmup_iters:
+        return 1.0
+
+    if method == "constant":
+        return warmup_factor
+    elif method == "linear":
+        alpha = iter / warmup_iters
+        return warmup_factor * (1 - alpha) + alpha
+    else:
+        raise ValueError("Unknown warmup method: {}".format(method))
diff --git a/src/sts/detectron2/structures/__init__.py b/src/sts/detectron2/structures/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..404af8815072f9d66da32c523718dd8b023b5a60
--- /dev/null
+++ b/src/sts/detectron2/structures/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa
+from .image_list import ImageList
+
+from .instances import Instances
+from .keypoints import Keypoints, heatmaps_to_keypoints
+from .masks import BitMasks, PolygonMasks, polygons_to_bitmask
+from .rotated_boxes import RotatedBoxes
+from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+
+
+from detectron2.utils.env import fixup_module_metadata
+
+fixup_module_metadata(__name__, globals(), __all__)
+del fixup_module_metadata
diff --git a/src/sts/detectron2/structures/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/structures/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cab003b8cc203acb66a24ddff7305e32bb1a44c4
Binary files /dev/null and b/src/sts/detectron2/structures/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/structures/__pycache__/boxes.cpython-38.pyc b/src/sts/detectron2/structures/__pycache__/boxes.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0d59e5c347bdf3e49ff156549115070f347b5a94
Binary files /dev/null and b/src/sts/detectron2/structures/__pycache__/boxes.cpython-38.pyc differ
diff --git a/src/sts/detectron2/structures/__pycache__/image_list.cpython-38.pyc b/src/sts/detectron2/structures/__pycache__/image_list.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fc46488a7310cfbe20a07db9ae35e87a247280ef
Binary files /dev/null and b/src/sts/detectron2/structures/__pycache__/image_list.cpython-38.pyc differ
diff --git a/src/sts/detectron2/structures/__pycache__/instances.cpython-38.pyc b/src/sts/detectron2/structures/__pycache__/instances.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9153c3128fe723d651a4456996dafe8f5c1cd9ee
Binary files /dev/null and b/src/sts/detectron2/structures/__pycache__/instances.cpython-38.pyc differ
diff --git a/src/sts/detectron2/structures/__pycache__/keypoints.cpython-38.pyc b/src/sts/detectron2/structures/__pycache__/keypoints.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e3da210e19ab7f085d99bde05df3cb995d95f5b4
Binary files /dev/null and b/src/sts/detectron2/structures/__pycache__/keypoints.cpython-38.pyc differ
diff --git a/src/sts/detectron2/structures/__pycache__/masks.cpython-38.pyc b/src/sts/detectron2/structures/__pycache__/masks.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a512d308362072087cfabc11959d91d296453e85
Binary files /dev/null and b/src/sts/detectron2/structures/__pycache__/masks.cpython-38.pyc differ
diff --git a/src/sts/detectron2/structures/__pycache__/rotated_boxes.cpython-38.pyc b/src/sts/detectron2/structures/__pycache__/rotated_boxes.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7d41d13131e86bb29aca9323700eed808a77f0cf
Binary files /dev/null and b/src/sts/detectron2/structures/__pycache__/rotated_boxes.cpython-38.pyc differ
diff --git a/src/sts/detectron2/structures/boxes.py b/src/sts/detectron2/structures/boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d8762d60a873c6b6daa42e9e7fcac41eda32fec
--- /dev/null
+++ b/src/sts/detectron2/structures/boxes.py
@@ -0,0 +1,416 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+import numpy as np
+from enum import IntEnum, unique
+from typing import List, Tuple, Union
+import torch
+from torch import device
+
+from detectron2.utils.env import TORCH_VERSION
+
+_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray]
+
+
+if TORCH_VERSION < (1, 8):
+    _maybe_jit_unused = torch.jit.unused
+else:
+
+    def _maybe_jit_unused(x):
+        return x
+
+
+@unique
+class BoxMode(IntEnum):
+    """
+    Enum of different ways to represent a box.
+    """
+
+    XYXY_ABS = 0
+    """
+    (x0, y0, x1, y1) in absolute floating points coordinates.
+    The coordinates in range [0, width or height].
+    """
+    XYWH_ABS = 1
+    """
+    (x0, y0, w, h) in absolute floating points coordinates.
+    """
+    XYXY_REL = 2
+    """
+    Not yet supported!
+    (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image.
+    """
+    XYWH_REL = 3
+    """
+    Not yet supported!
+    (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image.
+    """
+    XYWHA_ABS = 4
+    """
+    (xc, yc, w, h, a) in absolute floating points coordinates.
+    (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw.
+    """
+
+    @staticmethod
+    def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType:
+        """
+        Args:
+            box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5
+            from_mode, to_mode (BoxMode)
+
+        Returns:
+            The converted box of the same type.
+        """
+        if from_mode == to_mode:
+            return box
+
+        original_type = type(box)
+        is_numpy = isinstance(box, np.ndarray)
+        single_box = isinstance(box, (list, tuple))
+        if single_box:
+            assert len(box) == 4 or len(box) == 5, (
+                "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor,"
+                " where k == 4 or 5"
+            )
+            arr = torch.tensor(box)[None, :]
+        else:
+            # avoid modifying the input box
+            if is_numpy:
+                arr = torch.from_numpy(np.asarray(box)).clone()
+            else:
+                arr = box.clone()
+
+        assert to_mode not in [BoxMode.XYXY_REL, BoxMode.XYWH_REL] and from_mode not in [
+            BoxMode.XYXY_REL,
+            BoxMode.XYWH_REL,
+        ], "Relative mode not yet supported!"
+
+        if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS:
+            assert (
+                arr.shape[-1] == 5
+            ), "The last dimension of input shape must be 5 for XYWHA format"
+            original_dtype = arr.dtype
+            arr = arr.double()
+
+            w = arr[:, 2]
+            h = arr[:, 3]
+            a = arr[:, 4]
+            c = torch.abs(torch.cos(a * math.pi / 180.0))
+            s = torch.abs(torch.sin(a * math.pi / 180.0))
+            # This basically computes the horizontal bounding rectangle of the rotated box
+            new_w = c * w + s * h
+            new_h = c * h + s * w
+
+            # convert center to top-left corner
+            arr[:, 0] -= new_w / 2.0
+            arr[:, 1] -= new_h / 2.0
+            # bottom-right corner
+            arr[:, 2] = arr[:, 0] + new_w
+            arr[:, 3] = arr[:, 1] + new_h
+
+            arr = arr[:, :4].to(dtype=original_dtype)
+        elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS:
+            original_dtype = arr.dtype
+            arr = arr.double()
+            arr[:, 0] += arr[:, 2] / 2.0
+            arr[:, 1] += arr[:, 3] / 2.0
+            angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype)
+            arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype)
+        else:
+            if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS:
+                arr[:, 2] += arr[:, 0]
+                arr[:, 3] += arr[:, 1]
+            elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS:
+                arr[:, 2] -= arr[:, 0]
+                arr[:, 3] -= arr[:, 1]
+            else:
+                raise NotImplementedError(
+                    "Conversion from BoxMode {} to {} is not supported yet".format(
+                        from_mode, to_mode
+                    )
+                )
+
+        if single_box:
+            return original_type(arr.flatten().tolist())
+        if is_numpy:
+            return arr.numpy()
+        else:
+            return arr
+
+
+class Boxes:
+    """
+    This structure stores a list of boxes as a Nx4 torch.Tensor.
+    It supports some common methods about boxes
+    (`area`, `clip`, `nonempty`, etc),
+    and also behaves like a Tensor
+    (support indexing, `to(device)`, `.device`, and iteration over all boxes)
+
+    Attributes:
+        tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2).
+    """
+
+    def __init__(self, tensor: torch.Tensor):
+        """
+        Args:
+            tensor (Tensor[float]): a Nx4 matrix.  Each row is (x1, y1, x2, y2).
+        """
+        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that does not depend on
+            # the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((-1, 4)).to(dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size()
+
+        self.tensor = tensor
+
+    def clone(self) -> "Boxes":
+        """
+        Clone the Boxes.
+
+        Returns:
+            Boxes
+        """
+        return Boxes(self.tensor.clone())
+
+    @_maybe_jit_unused
+    def to(self, device: torch.device):
+        # Boxes are assumed float32 and does not support to(dtype)
+        return Boxes(self.tensor.to(device=device))
+
+    def area(self) -> torch.Tensor:
+        """
+        Computes the area of all the boxes.
+
+        Returns:
+            torch.Tensor: a vector with areas of each box.
+        """
+        box = self.tensor
+        area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
+        return area
+
+    def clip(self, box_size: Tuple[int, int]) -> None:
+        """
+        Clip (in place) the boxes by limiting x coordinates to the range [0, width]
+        and y coordinates to the range [0, height].
+
+        Args:
+            box_size (height, width): The clipping box's size.
+        """
+        assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!"
+        h, w = box_size
+        x1 = self.tensor[:, 0].clamp(min=0, max=w)
+        y1 = self.tensor[:, 1].clamp(min=0, max=h)
+        x2 = self.tensor[:, 2].clamp(min=0, max=w)
+        y2 = self.tensor[:, 3].clamp(min=0, max=h)
+        self.tensor = torch.stack((x1, y1, x2, y2), dim=-1)
+
+    def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
+        """
+        Find boxes that are non-empty.
+        A box is considered empty, if either of its side is no larger than threshold.
+
+        Returns:
+            Tensor:
+                a binary vector which represents whether each box is empty
+                (False) or non-empty (True).
+        """
+        box = self.tensor
+        widths = box[:, 2] - box[:, 0]
+        heights = box[:, 3] - box[:, 1]
+        keep = (widths > threshold) & (heights > threshold)
+        return keep
+
+    def __getitem__(self, item) -> "Boxes":
+        """
+        Args:
+            item: int, slice, or a BoolTensor
+
+        Returns:
+            Boxes: Create a new :class:`Boxes` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box.
+        2. `new_boxes = boxes[2:10]`: return a slice of boxes.
+        3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor
+           with `length = len(boxes)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned Boxes might share storage with this Boxes,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return Boxes(self.tensor[item].view(1, -1))
+        b = self.tensor[item]
+        assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item)
+        return Boxes(b)
+
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+
+    def __repr__(self) -> str:
+        return "Boxes(" + str(self.tensor) + ")"
+
+    def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor:
+        """
+        Args:
+            box_size (height, width): Size of the reference box.
+            boundary_threshold (int): Boxes that extend beyond the reference box
+                boundary by more than boundary_threshold are considered "outside".
+
+        Returns:
+            a binary vector, indicating whether each box is inside the reference box.
+        """
+        height, width = box_size
+        inds_inside = (
+            (self.tensor[..., 0] >= -boundary_threshold)
+            & (self.tensor[..., 1] >= -boundary_threshold)
+            & (self.tensor[..., 2] < width + boundary_threshold)
+            & (self.tensor[..., 3] < height + boundary_threshold)
+        )
+        return inds_inside
+
+    def get_centers(self) -> torch.Tensor:
+        """
+        Returns:
+            The box centers in a Nx2 array of (x, y).
+        """
+        return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2
+
+    def scale(self, scale_x: float, scale_y: float) -> None:
+        """
+        Scale the box with horizontal and vertical scaling factors
+        """
+        self.tensor[:, 0::2] *= scale_x
+        self.tensor[:, 1::2] *= scale_y
+
+    @classmethod
+    @_maybe_jit_unused
+    def cat(cls, boxes_list: List["Boxes"]) -> "Boxes":
+        """
+        Concatenates a list of Boxes into a single Boxes
+
+        Arguments:
+            boxes_list (list[Boxes])
+
+        Returns:
+            Boxes: the concatenated Boxes
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all([isinstance(box, Boxes) for box in boxes_list])
+
+        # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
+        return cat_boxes
+
+    @property
+    def device(self) -> device:
+        return self.tensor.device
+
+    # type "Iterator[torch.Tensor]", yield, and iter() not supported by torchscript
+    # https://github.com/pytorch/pytorch/issues/18627
+    @torch.jit.unused
+    def __iter__(self):
+        """
+        Yield a box as a Tensor of shape (4,) at a time.
+        """
+        yield from self.tensor
+
+
+def pairwise_intersection(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Given two lists of boxes of size N and M,
+    compute the intersection area between __all__ N x M pairs of boxes.
+    The box order must be (xmin, ymin, xmax, ymax)
+
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+
+    Returns:
+        Tensor: intersection, sized [N,M].
+    """
+    boxes1, boxes2 = boxes1.tensor, boxes2.tensor
+    width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max(
+        boxes1[:, None, :2], boxes2[:, :2]
+    )  # [N,M,2]
+
+    width_height.clamp_(min=0)  # [N,M,2]
+    intersection = width_height.prod(dim=2)  # [N,M]
+    return intersection
+
+
+# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+# with slight modifications
+def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Given two lists of boxes of size N and M, compute the IoU
+    (intersection over union) between **all** N x M pairs of boxes.
+    The box order must be (xmin, ymin, xmax, ymax).
+
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+
+    Returns:
+        Tensor: IoU, sized [N,M].
+    """
+    area1 = boxes1.area()  # [N]
+    area2 = boxes2.area()  # [M]
+    inter = pairwise_intersection(boxes1, boxes2)
+
+    # handle empty boxes
+    iou = torch.where(
+        inter > 0,
+        inter / (area1[:, None] + area2 - inter),
+        torch.zeros(1, dtype=inter.dtype, device=inter.device),
+    )
+    return iou
+
+
+def pairwise_ioa(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Similar to :func:`pariwise_iou` but compute the IoA (intersection over boxes2 area).
+
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+
+    Returns:
+        Tensor: IoA, sized [N,M].
+    """
+    area2 = boxes2.area()  # [M]
+    inter = pairwise_intersection(boxes1, boxes2)
+
+    # handle empty boxes
+    ioa = torch.where(
+        inter > 0, inter / area2, torch.zeros(1, dtype=inter.dtype, device=inter.device)
+    )
+    return ioa
+
+
+def matched_boxlist_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Compute pairwise intersection over union (IOU) of two sets of matched
+    boxes. The box order must be (xmin, ymin, xmax, ymax).
+    Similar to boxlist_iou, but computes only diagonal elements of the matrix
+
+    Args:
+        boxes1: (Boxes) bounding boxes, sized [N,4].
+        boxes2: (Boxes) bounding boxes, sized [N,4].
+    Returns:
+        Tensor: iou, sized [N].
+    """
+    assert len(boxes1) == len(
+        boxes2
+    ), "boxlists should have the same" "number of entries, got {}, {}".format(
+        len(boxes1), len(boxes2)
+    )
+    area1 = boxes1.area()  # [N]
+    area2 = boxes2.area()  # [N]
+    box1, box2 = boxes1.tensor, boxes2.tensor
+    lt = torch.max(box1[:, :2], box2[:, :2])  # [N,2]
+    rb = torch.min(box1[:, 2:], box2[:, 2:])  # [N,2]
+    wh = (rb - lt).clamp(min=0)  # [N,2]
+    inter = wh[:, 0] * wh[:, 1]  # [N]
+    iou = inter / (area1 + area2 - inter)  # [N]
+    return iou
diff --git a/src/sts/detectron2/structures/image_list.py b/src/sts/detectron2/structures/image_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..26e6e49c55e27120ab26b6107cebb6c885f81c38
--- /dev/null
+++ b/src/sts/detectron2/structures/image_list.py
@@ -0,0 +1,124 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from __future__ import division
+from typing import Any, List, Tuple
+import torch
+from torch import device
+from torch.nn import functional as F
+
+from detectron2.utils.env import TORCH_VERSION
+
+
+def _as_tensor(x: Tuple[int, int]) -> torch.Tensor:
+    """
+    An equivalent of `torch.as_tensor`, but works under tracing if input
+    is a list of tensor. `torch.as_tensor` will record a constant in tracing,
+    but this function will use `torch.stack` instead.
+    """
+    if torch.jit.is_scripting():
+        return torch.as_tensor(x)
+    if isinstance(x, (list, tuple)) and all([isinstance(t, torch.Tensor) for t in x]):
+        return torch.stack(x)
+    return torch.as_tensor(x)
+
+
+class ImageList(object):
+    """
+    Structure that holds a list of images (of possibly
+    varying sizes) as a single tensor.
+    This works by padding the images to the same size,
+    and storing in a field the original sizes of each image
+
+    Attributes:
+        image_sizes (list[tuple[int, int]]): each tuple is (h, w).
+            During tracing, it becomes list[Tensor] instead.
+    """
+
+    def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
+        """
+        Arguments:
+            tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
+            image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
+                be smaller than (H, W) due to padding.
+        """
+        self.tensor = tensor
+        self.image_sizes = image_sizes
+
+    def __len__(self) -> int:
+        return len(self.image_sizes)
+
+    def __getitem__(self, idx) -> torch.Tensor:
+        """
+        Access the individual image in its original size.
+
+        Args:
+            idx: int or slice
+
+        Returns:
+            Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
+        """
+        size = self.image_sizes[idx]
+        return self.tensor[idx, ..., : size[0], : size[1]]
+
+    @torch.jit.unused
+    def to(self, *args: Any, **kwargs: Any) -> "ImageList":
+        cast_tensor = self.tensor.to(*args, **kwargs)
+        return ImageList(cast_tensor, self.image_sizes)
+
+    @property
+    def device(self) -> device:
+        return self.tensor.device
+
+    @staticmethod
+    def from_tensors(
+        tensors: List[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
+    ) -> "ImageList":
+        """
+        Args:
+            tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or
+                (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
+                to the same shape with `pad_value`.
+            size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
+                the common height and width is divisible by `size_divisibility`.
+                This depends on the model and many models need a divisibility of 32.
+            pad_value (float): value to pad
+
+        Returns:
+            an `ImageList`.
+        """
+        assert len(tensors) > 0
+        assert isinstance(tensors, (tuple, list))
+        for t in tensors:
+            assert isinstance(t, torch.Tensor), type(t)
+            assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
+
+        image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
+        image_sizes_tensor = [_as_tensor(x) for x in image_sizes]
+        max_size = torch.stack(image_sizes_tensor).max(0).values
+
+        if size_divisibility > 1:
+            stride = size_divisibility
+            # the last two dims are H,W, both subject to divisibility requirement
+            max_size = (max_size + (stride - 1)) // stride * stride
+
+        # handle weirdness of scripting and tracing ...
+        if torch.jit.is_scripting():
+            max_size: List[int] = max_size.to(dtype=torch.long).tolist()
+        else:
+            # https://github.com/pytorch/pytorch/issues/42448
+            if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing():
+                image_sizes = image_sizes_tensor
+
+        if len(tensors) == 1:
+            # This seems slightly (2%) faster.
+            # TODO: check whether it's faster for multiple images as well
+            image_size = image_sizes[0]
+            padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
+            batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
+        else:
+            # max_size can be a tensor in tracing mode, therefore convert to list
+            batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
+            batched_imgs = tensors[0].new_full(batch_shape, pad_value)
+            for img, pad_img in zip(tensors, batched_imgs):
+                pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
+
+        return ImageList(batched_imgs.contiguous(), image_sizes)
diff --git a/src/sts/detectron2/structures/instances.py b/src/sts/detectron2/structures/instances.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6bc832796b1a71dfa3ce6c06735ad02acb7a482
--- /dev/null
+++ b/src/sts/detectron2/structures/instances.py
@@ -0,0 +1,191 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+from typing import Any, Dict, List, Tuple, Union
+import torch
+
+
+class Instances:
+    """
+    This class represents a list of instances in an image.
+    It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields".
+    All fields must have the same ``__len__`` which is the number of instances.
+
+    All other (non-field) attributes of this class are considered private:
+    they must start with '_' and are not modifiable by a user.
+
+    Some basic usage:
+
+    1. Set/get/check a field:
+
+       .. code-block:: python
+
+          instances.gt_boxes = Boxes(...)
+          print(instances.pred_masks)  # a tensor of shape (N, H, W)
+          print('gt_masks' in instances)
+
+    2. ``len(instances)`` returns the number of instances
+    3. Indexing: ``instances[indices]`` will apply the indexing on all the fields
+       and returns a new :class:`Instances`.
+       Typically, ``indices`` is a integer vector of indices,
+       or a binary mask of length ``num_instances``
+
+       .. code-block:: python
+
+          category_3_detections = instances[instances.pred_classes == 3]
+          confident_detections = instances[instances.scores > 0.9]
+    """
+
+    def __init__(self, image_size: Tuple[int, int], **kwargs: Any):
+        """
+        Args:
+            image_size (height, width): the spatial size of the image.
+            kwargs: fields to add to this `Instances`.
+        """
+        self._image_size = image_size
+        self._fields: Dict[str, Any] = {}
+        for k, v in kwargs.items():
+            self.set(k, v)
+
+    @property
+    def image_size(self) -> Tuple[int, int]:
+        """
+        Returns:
+            tuple: height, width
+        """
+        return self._image_size
+
+    def __setattr__(self, name: str, val: Any) -> None:
+        if name.startswith("_"):
+            super().__setattr__(name, val)
+        else:
+            self.set(name, val)
+
+    def __getattr__(self, name: str) -> Any:
+        if name == "_fields" or name not in self._fields:
+            raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
+        return self._fields[name]
+
+    def set(self, name: str, value: Any) -> None:
+        """
+        Set the field named `name` to `value`.
+        The length of `value` must be the number of instances,
+        and must agree with other existing fields in this object.
+        """
+        data_len = len(value)
+        if len(self._fields):
+            assert (
+                len(self) == data_len
+            ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
+        self._fields[name] = value
+
+    def has(self, name: str) -> bool:
+        """
+        Returns:
+            bool: whether the field called `name` exists.
+        """
+        return name in self._fields
+
+    def remove(self, name: str) -> None:
+        """
+        Remove the field called `name`.
+        """
+        del self._fields[name]
+
+    def get(self, name: str) -> Any:
+        """
+        Returns the field called `name`.
+        """
+        return self._fields[name]
+
+    def get_fields(self) -> Dict[str, Any]:
+        """
+        Returns:
+            dict: a dict which maps names (str) to data of the fields
+
+        Modifying the returned dict will modify this instance.
+        """
+        return self._fields
+
+    # Tensor-like methods
+    def to(self, *args: Any, **kwargs: Any) -> "Instances":
+        """
+        Returns:
+            Instances: all fields are called with a `to(device)`, if the field has this method.
+        """
+        ret = Instances(self._image_size)
+        for k, v in self._fields.items():
+            if hasattr(v, "to"):
+                v = v.to(*args, **kwargs)
+            ret.set(k, v)
+        return ret
+
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances":
+        """
+        Args:
+            item: an index-like object and will be used to index all the fields.
+
+        Returns:
+            If `item` is a string, return the data in the corresponding field.
+            Otherwise, returns an `Instances` where all fields are indexed by `item`.
+        """
+        if type(item) == int:
+            if item >= len(self) or item < -len(self):
+                raise IndexError("Instances index out of range!")
+            else:
+                item = slice(item, None, len(self))
+
+        ret = Instances(self._image_size)
+        for k, v in self._fields.items():
+            ret.set(k, v[item])
+        return ret
+
+    def __len__(self) -> int:
+        for v in self._fields.values():
+            # use __len__ because len() has to be int and is not friendly to tracing
+            return v.__len__()
+        raise NotImplementedError("Empty Instances does not support __len__!")
+
+    def __iter__(self):
+        raise NotImplementedError("`Instances` object is not iterable!")
+
+    @staticmethod
+    def cat(instance_lists: List["Instances"]) -> "Instances":
+        """
+        Args:
+            instance_lists (list[Instances])
+
+        Returns:
+            Instances
+        """
+        assert all(isinstance(i, Instances) for i in instance_lists)
+        assert len(instance_lists) > 0
+        if len(instance_lists) == 1:
+            return instance_lists[0]
+
+        image_size = instance_lists[0].image_size
+        for i in instance_lists[1:]:
+            assert i.image_size == image_size
+        ret = Instances(image_size)
+        for k in instance_lists[0]._fields.keys():
+            values = [i.get(k) for i in instance_lists]
+            v0 = values[0]
+            if isinstance(v0, torch.Tensor):
+                values = torch.cat(values, dim=0)
+            elif isinstance(v0, list):
+                values = list(itertools.chain(*values))
+            elif hasattr(type(v0), "cat"):
+                values = type(v0).cat(values)
+            else:
+                raise ValueError("Unsupported type {} for concatenation".format(type(v0)))
+            ret.set(k, values)
+        return ret
+
+    def __str__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={}, ".format(len(self))
+        s += "image_height={}, ".format(self._image_size[0])
+        s += "image_width={}, ".format(self._image_size[1])
+        s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items())))
+        return s
+
+    __repr__ = __str__
diff --git a/src/sts/detectron2/structures/keypoints.py b/src/sts/detectron2/structures/keypoints.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d956a2d57e30be18ccef1fd3cf201d5ba3d8ab4
--- /dev/null
+++ b/src/sts/detectron2/structures/keypoints.py
@@ -0,0 +1,230 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+from typing import Any, List, Tuple, Union
+import torch
+from torch.nn import functional as F
+
+from detectron2.utils.env import TORCH_VERSION
+
+if TORCH_VERSION < (1, 8):
+
+    def script_if_tracing(fn):
+        return fn
+
+
+else:
+    script_if_tracing = torch.jit.script_if_tracing
+
+
+class Keypoints:
+    """
+    Stores keypoint **annotation** data. GT Instances have a `gt_keypoints` property
+    containing the x,y location and visibility flag of each keypoint. This tensor has shape
+    (N, K, 3) where N is the number of instances and K is the number of keypoints per instance.
+
+    The visibility flag follows the COCO format and must be one of three integers:
+
+    * v=0: not labeled (in which case x=y=0)
+    * v=1: labeled but not visible
+    * v=2: labeled and visible
+    """
+
+    def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]):
+        """
+        Arguments:
+            keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint.
+                The shape should be (N, K, 3) where N is the number of
+                instances, and K is the number of keypoints per instance.
+        """
+        device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu")
+        keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
+        assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape
+        self.tensor = keypoints
+
+    def __len__(self) -> int:
+        return self.tensor.size(0)
+
+    def to(self, *args: Any, **kwargs: Any) -> "Keypoints":
+        return type(self)(self.tensor.to(*args, **kwargs))
+
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+
+    def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor:
+        """
+        Convert keypoint annotations to a heatmap of one-hot labels for training,
+        as described in :paper:`Mask R-CNN`.
+
+        Arguments:
+            boxes: Nx4 tensor, the boxes to draw the keypoints to
+
+        Returns:
+            heatmaps:
+                A tensor of shape (N, K), each element is integer spatial label
+                in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
+            valid:
+                A tensor of shape (N, K) containing whether each keypoint is in the roi or not.
+        """
+        return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size)
+
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints":
+        """
+        Create a new `Keypoints` by indexing on this `Keypoints`.
+
+        The following usage are allowed:
+
+        1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance.
+        2. `new_kpts = kpts[2:10]`: return a slice of key points.
+        3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor
+           with `length = len(kpts)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned Keypoints might share storage with this Keypoints,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return Keypoints([self.tensor[item]])
+        return Keypoints(self.tensor[item])
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.tensor))
+        return s
+
+
+# TODO make this nicer, this is a direct translation from C2 (but removing the inner loop)
+def _keypoints_to_heatmap(
+    keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space.
+
+    Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the
+    closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the
+    continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"):
+    d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
+
+    Arguments:
+        keypoints: tensor of keypoint locations in of shape (N, K, 3).
+        rois: Nx4 tensor of rois in xyxy format
+        heatmap_size: integer side length of square heatmap.
+
+    Returns:
+        heatmaps: A tensor of shape (N, K) containing an integer spatial label
+            in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
+        valid: A tensor of shape (N, K) containing whether each keypoint is in
+            the roi or not.
+    """
+
+    if rois.numel() == 0:
+        return rois.new().long(), rois.new().long()
+    offset_x = rois[:, 0]
+    offset_y = rois[:, 1]
+    scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
+    scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
+
+    offset_x = offset_x[:, None]
+    offset_y = offset_y[:, None]
+    scale_x = scale_x[:, None]
+    scale_y = scale_y[:, None]
+
+    x = keypoints[..., 0]
+    y = keypoints[..., 1]
+
+    x_boundary_inds = x == rois[:, 2][:, None]
+    y_boundary_inds = y == rois[:, 3][:, None]
+
+    x = (x - offset_x) * scale_x
+    x = x.floor().long()
+    y = (y - offset_y) * scale_y
+    y = y.floor().long()
+
+    x[x_boundary_inds] = heatmap_size - 1
+    y[y_boundary_inds] = heatmap_size - 1
+
+    valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
+    vis = keypoints[..., 2] > 0
+    valid = (valid_loc & vis).long()
+
+    lin_ind = y * heatmap_size + x
+    heatmaps = lin_ind * valid
+
+    return heatmaps, valid
+
+
+@script_if_tracing
+def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
+    """
+    Extract predicted keypoint locations from heatmaps.
+
+    Args:
+        maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for
+            each ROI and each keypoint.
+        rois (Tensor): (#ROIs, 4). The box of each ROI.
+
+    Returns:
+        Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to
+        (x, y, logit, score) for each keypoint.
+
+    When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate,
+    we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from
+    Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
+    """
+    # The decorator use of torch.no_grad() was not supported by torchscript.
+    # https://github.com/pytorch/pytorch/issues/44768
+    maps = maps.detach()
+    rois = rois.detach()
+
+    offset_x = rois[:, 0]
+    offset_y = rois[:, 1]
+
+    widths = (rois[:, 2] - rois[:, 0]).clamp(min=1)
+    heights = (rois[:, 3] - rois[:, 1]).clamp(min=1)
+    widths_ceil = widths.ceil()
+    heights_ceil = heights.ceil()
+
+    num_rois, num_keypoints = maps.shape[:2]
+    xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4)
+
+    width_corrections = widths / widths_ceil
+    height_corrections = heights / heights_ceil
+
+    keypoints_idx = torch.arange(num_keypoints, device=maps.device)
+
+    for i in range(num_rois):
+        outsize = (int(heights_ceil[i]), int(widths_ceil[i]))
+        roi_map = F.interpolate(
+            maps[[i]], size=outsize, mode="bicubic", align_corners=False
+        ).squeeze(
+            0
+        )  # #keypoints x H x W
+
+        # softmax over the spatial region
+        max_score, _ = roi_map.view(num_keypoints, -1).max(1)
+        max_score = max_score.view(num_keypoints, 1, 1)
+        tmp_full_resolution = (roi_map - max_score).exp_()
+        tmp_pool_resolution = (maps[i] - max_score).exp_()
+        # Produce scores over the region H x W, but normalize with POOL_H x POOL_W,
+        # so that the scores of objects of different absolute sizes will be more comparable
+        roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True)
+
+        w = roi_map.shape[2]
+        pos = roi_map.view(num_keypoints, -1).argmax(1)
+
+        x_int = pos % w
+        y_int = (pos - x_int) // w
+
+        assert (
+            roi_map_scores[keypoints_idx, y_int, x_int]
+            == roi_map_scores.view(num_keypoints, -1).max(1)[0]
+        ).all()
+
+        x = (x_int.float() + 0.5) * width_corrections[i]
+        y = (y_int.float() + 0.5) * height_corrections[i]
+
+        xy_preds[i, :, 0] = x + offset_x[i]
+        xy_preds[i, :, 1] = y + offset_y[i]
+        xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int]
+        xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int]
+
+    return xy_preds
diff --git a/src/sts/detectron2/structures/masks.py b/src/sts/detectron2/structures/masks.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5d2abdbff9e8200680f1ebb90c1af0bf533c323
--- /dev/null
+++ b/src/sts/detectron2/structures/masks.py
@@ -0,0 +1,441 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import numpy as np
+from typing import Any, Iterator, List, Union
+import pycocotools.mask as mask_util
+import torch
+
+from detectron2.layers.roi_align import ROIAlign
+
+from .boxes import Boxes
+
+
+def polygon_area(x, y):
+    # Using the shoelace formula
+    # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
+    return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
+
+
+def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray:
+    """
+    Args:
+        polygons (list[ndarray]): each array has shape (Nx2,)
+        height, width (int)
+
+    Returns:
+        ndarray: a bool mask of shape (height, width)
+    """
+    assert len(polygons) > 0, "COCOAPI does not support empty polygons"
+    rles = mask_util.frPyObjects(polygons, height, width)
+    rle = mask_util.merge(rles)
+    return mask_util.decode(rle).astype(np.bool)
+
+
+def rasterize_polygons_within_box(
+    polygons: List[np.ndarray], box: np.ndarray, mask_size: int
+) -> torch.Tensor:
+    """
+    Rasterize the polygons into a mask image and
+    crop the mask content in the given box.
+    The cropped mask is resized to (mask_size, mask_size).
+
+    This function is used when generating training targets for mask head in Mask R-CNN.
+    Given original ground-truth masks for an image, new ground-truth mask
+    training targets in the size of `mask_size x mask_size`
+    must be provided for each predicted box. This function will be called to
+    produce such targets.
+
+    Args:
+        polygons (list[ndarray[float]]): a list of polygons, which represents an instance.
+        box: 4-element numpy array
+        mask_size (int):
+
+    Returns:
+        Tensor: BoolTensor of shape (mask_size, mask_size)
+    """
+    # 1. Shift the polygons w.r.t the boxes
+    w, h = box[2] - box[0], box[3] - box[1]
+
+    polygons = copy.deepcopy(polygons)
+    for p in polygons:
+        p[0::2] = p[0::2] - box[0]
+        p[1::2] = p[1::2] - box[1]
+
+    # 2. Rescale the polygons to the new box size
+    # max() to avoid division by small number
+    ratio_h = mask_size / max(h, 0.1)
+    ratio_w = mask_size / max(w, 0.1)
+
+    if ratio_h == ratio_w:
+        for p in polygons:
+            p *= ratio_h
+    else:
+        for p in polygons:
+            p[0::2] *= ratio_w
+            p[1::2] *= ratio_h
+
+    # 3. Rasterize the polygons with coco api
+    mask = polygons_to_bitmask(polygons, mask_size, mask_size)
+    mask = torch.from_numpy(mask)
+    return mask
+
+
+class BitMasks:
+    """
+    This class stores the segmentation masks for all objects in one image, in
+    the form of bitmaps.
+
+    Attributes:
+        tensor: bool Tensor of N,H,W, representing N instances in the image.
+    """
+
+    def __init__(self, tensor: Union[torch.Tensor, np.ndarray]):
+        """
+        Args:
+            tensor: bool Tensor of N,H,W, representing N instances in the image.
+        """
+        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
+        tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device)
+        assert tensor.dim() == 3, tensor.size()
+        self.image_size = tensor.shape[1:]
+        self.tensor = tensor
+
+    def to(self, *args: Any, **kwargs: Any) -> "BitMasks":
+        return BitMasks(self.tensor.to(*args, **kwargs))
+
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks":
+        """
+        Returns:
+            BitMasks: Create a new :class:`BitMasks` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask.
+        2. `new_masks = masks[2:10]`: return a slice of masks.
+        3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor
+           with `length = len(masks)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned object might share storage with this object,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return BitMasks(self.tensor[item].view(1, -1))
+        m = self.tensor[item]
+        assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format(
+            item, m.shape
+        )
+        return BitMasks(m)
+
+    def __iter__(self) -> torch.Tensor:
+        yield from self.tensor
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.tensor))
+        return s
+
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+
+    def nonempty(self) -> torch.Tensor:
+        """
+        Find masks that are non-empty.
+
+        Returns:
+            Tensor: a BoolTensor which represents
+                whether each mask is empty (False) or non-empty (True).
+        """
+        return self.tensor.flatten(1).any(dim=1)
+
+    @staticmethod
+    def from_polygon_masks(
+        polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int
+    ) -> "BitMasks":
+        """
+        Args:
+            polygon_masks (list[list[ndarray]] or PolygonMasks)
+            height, width (int)
+        """
+        if isinstance(polygon_masks, PolygonMasks):
+            polygon_masks = polygon_masks.polygons
+        masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks]
+        return BitMasks(torch.stack([torch.from_numpy(x) for x in masks]))
+
+    def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
+        """
+        Crop each bitmask by the given box, and resize results to (mask_size, mask_size).
+        This can be used to prepare training targets for Mask R-CNN.
+        It has less reconstruction error compared to rasterization with polygons.
+        However we observe no difference in accuracy,
+        but BitMasks requires more memory to store all the masks.
+
+        Args:
+            boxes (Tensor): Nx4 tensor storing the boxes for each mask
+            mask_size (int): the size of the rasterized mask.
+
+        Returns:
+            Tensor:
+                A bool tensor of shape (N, mask_size, mask_size), where
+                N is the number of predicted boxes for this image.
+        """
+        assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
+        device = self.tensor.device
+
+        batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None]
+        rois = torch.cat([batch_inds, boxes], dim=1)  # Nx5
+
+        bit_masks = self.tensor.to(dtype=torch.float32)
+        rois = rois.to(device=device)
+        output = (
+            ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True)
+            .forward(bit_masks[:, None, :, :], rois)
+            .squeeze(1)
+        )
+        output = output >= 0.5
+        return output
+
+    def get_bounding_boxes(self) -> Boxes:
+        """
+        Returns:
+            Boxes: tight bounding boxes around bitmasks.
+            If a mask is empty, it's bounding box will be all zero.
+        """
+        boxes = torch.zeros(self.tensor.shape[0], 4, dtype=torch.float32)
+        x_any = torch.any(self.tensor, dim=1)
+        y_any = torch.any(self.tensor, dim=2)
+        for idx in range(self.tensor.shape[0]):
+            x = torch.where(x_any[idx, :])[0]
+            y = torch.where(y_any[idx, :])[0]
+            if len(x) > 0 and len(y) > 0:
+                boxes[idx, :] = torch.as_tensor(
+                    [x[0], y[0], x[-1] + 1, y[-1] + 1], dtype=torch.float32
+                )
+        return Boxes(boxes)
+
+    @staticmethod
+    def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks":
+        """
+        Concatenates a list of BitMasks into a single BitMasks
+
+        Arguments:
+            bitmasks_list (list[BitMasks])
+
+        Returns:
+            BitMasks: the concatenated BitMasks
+        """
+        assert isinstance(bitmasks_list, (list, tuple))
+        assert len(bitmasks_list) > 0
+        assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list)
+
+        cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0))
+        return cat_bitmasks
+
+
+class PolygonMasks:
+    """
+    This class stores the segmentation masks for all objects in one image, in the form of polygons.
+
+    Attributes:
+        polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon.
+    """
+
+    def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]):
+        """
+        Arguments:
+            polygons (list[list[np.ndarray]]): The first
+                level of the list correspond to individual instances,
+                the second level to all the polygons that compose the
+                instance, and the third level to the polygon coordinates.
+                The third level array should have the format of
+                [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+        """
+        if not isinstance(polygons, list):
+            raise ValueError(
+                "Cannot create PolygonMasks: Expect a list of list of polygons per image. "
+                "Got '{}' instead.".format(type(polygons))
+            )
+
+        def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
+            # Use float64 for higher precision, because why not?
+            # Always put polygons on CPU (self.to is a no-op) since they
+            # are supposed to be small tensors.
+            # May need to change this assumption if GPU placement becomes useful
+            if isinstance(t, torch.Tensor):
+                t = t.cpu().numpy()
+            return np.asarray(t).astype("float64")
+
+        def process_polygons(
+            polygons_per_instance: List[Union[torch.Tensor, np.ndarray]]
+        ) -> List[np.ndarray]:
+            if not isinstance(polygons_per_instance, list):
+                raise ValueError(
+                    "Cannot create polygons: Expect a list of polygons per instance. "
+                    "Got '{}' instead.".format(type(polygons_per_instance))
+                )
+            # transform each polygon to a numpy array
+            polygons_per_instance = [_make_array(p) for p in polygons_per_instance]
+            for polygon in polygons_per_instance:
+                if len(polygon) % 2 != 0 or len(polygon) < 6:
+                    raise ValueError(f"Cannot create a polygon from {len(polygon)} coordinates.")
+            return polygons_per_instance
+
+        self.polygons: List[List[np.ndarray]] = [
+            process_polygons(polygons_per_instance) for polygons_per_instance in polygons
+        ]
+
+    def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks":
+        return self
+
+    @property
+    def device(self) -> torch.device:
+        return torch.device("cpu")
+
+    def get_bounding_boxes(self) -> Boxes:
+        """
+        Returns:
+            Boxes: tight bounding boxes around polygon masks.
+        """
+        boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32)
+        for idx, polygons_per_instance in enumerate(self.polygons):
+            minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32)
+            maxxy = torch.zeros(2, dtype=torch.float32)
+            for polygon in polygons_per_instance:
+                coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32)
+                minxy = torch.min(minxy, torch.min(coords, dim=0).values)
+                maxxy = torch.max(maxxy, torch.max(coords, dim=0).values)
+            boxes[idx, :2] = minxy
+            boxes[idx, 2:] = maxxy
+        return Boxes(boxes)
+
+    def nonempty(self) -> torch.Tensor:
+        """
+        Find masks that are non-empty.
+
+        Returns:
+            Tensor:
+                a BoolTensor which represents whether each mask is empty (False) or not (True).
+        """
+        keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons]
+        return torch.from_numpy(np.asarray(keep, dtype=np.bool))
+
+    def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks":
+        """
+        Support indexing over the instances and return a `PolygonMasks` object.
+        `item` can be:
+
+        1. An integer. It will return an object with only one instance.
+        2. A slice. It will return an object with the selected instances.
+        3. A list[int]. It will return an object with the selected instances,
+           correpsonding to the indices in the list.
+        4. A vector mask of type BoolTensor, whose length is num_instances.
+           It will return an object with the instances whose mask is nonzero.
+        """
+        if isinstance(item, int):
+            selected_polygons = [self.polygons[item]]
+        elif isinstance(item, slice):
+            selected_polygons = self.polygons[item]
+        elif isinstance(item, list):
+            selected_polygons = [self.polygons[i] for i in item]
+        elif isinstance(item, torch.Tensor):
+            # Polygons is a list, so we have to move the indices back to CPU.
+            if item.dtype == torch.bool:
+                assert item.dim() == 1, item.shape
+                item = torch.nonzero(item, as_tuple=False).squeeze(1).cpu().numpy().tolist()
+            elif item.dtype in [torch.int32, torch.int64]:
+                item = item.cpu().numpy().tolist()
+            else:
+                raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype))
+            selected_polygons = [self.polygons[i] for i in item]
+        return PolygonMasks(selected_polygons)
+
+    def __iter__(self) -> Iterator[List[np.ndarray]]:
+        """
+        Yields:
+            list[ndarray]: the polygons for one instance.
+            Each Tensor is a float64 vector representing a polygon.
+        """
+        return iter(self.polygons)
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.polygons))
+        return s
+
+    def __len__(self) -> int:
+        return len(self.polygons)
+
+    def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
+        """
+        Crop each mask by the given box, and resize results to (mask_size, mask_size).
+        This can be used to prepare training targets for Mask R-CNN.
+
+        Args:
+            boxes (Tensor): Nx4 tensor storing the boxes for each mask
+            mask_size (int): the size of the rasterized mask.
+
+        Returns:
+            Tensor: A bool tensor of shape (N, mask_size, mask_size), where
+            N is the number of predicted boxes for this image.
+        """
+        assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
+
+        device = boxes.device
+        # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise
+        # (several small tensors for representing a single instance mask)
+        boxes = boxes.to(torch.device("cpu"))
+
+        results = [
+            rasterize_polygons_within_box(poly, box.numpy(), mask_size)
+            for poly, box in zip(self.polygons, boxes)
+        ]
+        """
+        poly: list[list[float]], the polygons for one instance
+        box: a tensor of shape (4,)
+        """
+        if len(results) == 0:
+            return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device)
+        return torch.stack(results, dim=0).to(device=device)
+
+    def area(self):
+        """
+        Computes area of the mask.
+        Only works with Polygons, using the shoelace formula:
+        https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
+
+        Returns:
+            Tensor: a vector, area for each instance
+        """
+
+        area = []
+        for polygons_per_instance in self.polygons:
+            area_per_instance = 0
+            for p in polygons_per_instance:
+                area_per_instance += polygon_area(p[0::2], p[1::2])
+            area.append(area_per_instance)
+
+        return torch.tensor(area)
+
+    @staticmethod
+    def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks":
+        """
+        Concatenates a list of PolygonMasks into a single PolygonMasks
+
+        Arguments:
+            polymasks_list (list[PolygonMasks])
+
+        Returns:
+            PolygonMasks: the concatenated PolygonMasks
+        """
+        assert isinstance(polymasks_list, (list, tuple))
+        assert len(polymasks_list) > 0
+        assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list)
+
+        cat_polymasks = type(polymasks_list[0])(
+            list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list))
+        )
+        return cat_polymasks
diff --git a/src/sts/detectron2/structures/rotated_boxes.py b/src/sts/detectron2/structures/rotated_boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f48b40560f2f409b20d87bb1ff448bf44e090d2
--- /dev/null
+++ b/src/sts/detectron2/structures/rotated_boxes.py
@@ -0,0 +1,505 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import math
+from typing import List, Tuple
+import torch
+
+from detectron2.layers.rotated_boxes import pairwise_iou_rotated
+
+from .boxes import Boxes, _maybe_jit_unused
+
+
+class RotatedBoxes(Boxes):
+    """
+    This structure stores a list of rotated boxes as a Nx5 torch.Tensor.
+    It supports some common methods about boxes
+    (`area`, `clip`, `nonempty`, etc),
+    and also behaves like a Tensor
+    (support indexing, `to(device)`, `.device`, and iteration over all boxes)
+    """
+
+    def __init__(self, tensor: torch.Tensor):
+        """
+        Args:
+            tensor (Tensor[float]): a Nx5 matrix.  Each row is
+                (x_center, y_center, width, height, angle),
+                in which angle is represented in degrees.
+                While there's no strict range restriction for it,
+                the recommended principal range is between [-180, 180) degrees.
+
+        Assume we have a horizontal box B = (x_center, y_center, width, height),
+        where width is along the x-axis and height is along the y-axis.
+        The rotated box B_rot (x_center, y_center, width, height, angle)
+        can be seen as:
+
+        1. When angle == 0:
+           B_rot == B
+        2. When angle > 0:
+           B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW;
+        3. When angle < 0:
+           B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW.
+
+        Mathematically, since the right-handed coordinate system for image space
+        is (y, x), where y is top->down and x is left->right, the 4 vertices of the
+        rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from
+        the vertices of the horizontal rectangle :math:`(y_i, x_i)` (i = 1, 2, 3, 4)
+        in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians,
+        :math:`(y_c, x_c)` is the center of the rectangle):
+
+        .. math::
+
+            yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c,
+
+            xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c,
+
+        which is the standard rigid-body rotation transformation.
+
+        Intuitively, the angle is
+        (1) the rotation angle from y-axis in image space
+        to the height vector (top->down in the box's local coordinate system)
+        of the box in CCW, and
+        (2) the rotation angle from x-axis in image space
+        to the width vector (left->right in the box's local coordinate system)
+        of the box in CCW.
+
+        More intuitively, consider the following horizontal box ABCD represented
+        in (x1, y1, x2, y2): (3, 2, 7, 4),
+        covering the [3, 7] x [2, 4] region of the continuous coordinate system
+        which looks like this:
+
+        .. code:: none
+
+            O--------> x
+            |
+            |  A---B
+            |  |   |
+            |  D---C
+            |
+            v y
+
+        Note that each capital letter represents one 0-dimensional geometric point
+        instead of a 'square pixel' here.
+
+        In the example above, using (x, y) to represent a point we have:
+
+        .. math::
+
+            O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4)
+
+        We name vector AB = vector DC as the width vector in box's local coordinate system, and
+        vector AD = vector BC as the height vector in box's local coordinate system. Initially,
+        when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis
+        in the image space, respectively.
+
+        For better illustration, we denote the center of the box as E,
+
+        .. code:: none
+
+            O--------> x
+            |
+            |  A---B
+            |  | E |
+            |  D---C
+            |
+            v y
+
+        where the center E = ((3+7)/2, (2+4)/2) = (5, 3).
+
+        Also,
+
+        .. math::
+
+            width = |AB| = |CD| = 7 - 3 = 4,
+            height = |AD| = |BC| = 4 - 2 = 2.
+
+        Therefore, the corresponding representation for the same shape in rotated box in
+        (x_center, y_center, width, height, angle) format is:
+
+        (5, 3, 4, 2, 0),
+
+        Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees
+        CCW (counter-clockwise) by definition. It looks like this:
+
+        .. code:: none
+
+            O--------> x
+            |   B-C
+            |   | |
+            |   |E|
+            |   | |
+            |   A-D
+            v y
+
+        The center E is still located at the same point (5, 3), while the vertices
+        ABCD are rotated by 90 degrees CCW with regard to E:
+        A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5)
+
+        Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to
+        vector AD or vector BC (the top->down height vector in box's local coordinate system),
+        or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right
+        width vector in box's local coordinate system).
+
+        .. math::
+
+            width = |AB| = |CD| = 5 - 1 = 4,
+            height = |AD| = |BC| = 6 - 4 = 2.
+
+        Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise)
+        by definition? It looks like this:
+
+        .. code:: none
+
+            O--------> x
+            |   D-A
+            |   | |
+            |   |E|
+            |   | |
+            |   C-B
+            v y
+
+        The center E is still located at the same point (5, 3), while the vertices
+        ABCD are rotated by 90 degrees CW with regard to E:
+        A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1)
+
+        .. math::
+
+            width = |AB| = |CD| = 5 - 1 = 4,
+            height = |AD| = |BC| = 6 - 4 = 2.
+
+        This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU
+        will be 1. However, these two will generate different RoI Pooling results and
+        should not be treated as an identical box.
+
+        On the other hand, it's easy to see that (X, Y, W, H, A) is identical to
+        (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be
+        identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is
+        equivalent to rotating the same shape 90 degrees CW.
+
+        We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180):
+
+        .. code:: none
+
+            O--------> x
+            |
+            |  C---D
+            |  | E |
+            |  B---A
+            |
+            v y
+
+        .. math::
+
+            A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2),
+
+            width = |AB| = |CD| = 7 - 3 = 4,
+            height = |AD| = |BC| = 4 - 2 = 2.
+
+        Finally, this is a very inaccurate (heavily quantized) illustration of
+        how (5, 3, 4, 2, 60) looks like in case anyone wonders:
+
+        .. code:: none
+
+            O--------> x
+            |     B\
+            |    /  C
+            |   /E /
+            |  A  /
+            |   `D
+            v y
+
+        It's still a rectangle with center of (5, 3), width of 4 and height of 2,
+        but its angle (and thus orientation) is somewhere between
+        (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90).
+        """
+        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that does not depend on
+            # the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size()
+
+        self.tensor = tensor
+
+    def clone(self) -> "RotatedBoxes":
+        """
+        Clone the RotatedBoxes.
+
+        Returns:
+            RotatedBoxes
+        """
+        return RotatedBoxes(self.tensor.clone())
+
+    @_maybe_jit_unused
+    def to(self, device: torch.device):
+        # Boxes are assumed float32 and does not support to(dtype)
+        return RotatedBoxes(self.tensor.to(device=device))
+
+    def area(self) -> torch.Tensor:
+        """
+        Computes the area of all the boxes.
+
+        Returns:
+            torch.Tensor: a vector with areas of each box.
+        """
+        box = self.tensor
+        area = box[:, 2] * box[:, 3]
+        return area
+
+    def normalize_angles(self) -> None:
+        """
+        Restrict angles to the range of [-180, 180) degrees
+        """
+        self.tensor[:, 4] = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0
+
+    def clip(self, box_size: Tuple[int, int], clip_angle_threshold: float = 1.0) -> None:
+        """
+        Clip (in place) the boxes by limiting x coordinates to the range [0, width]
+        and y coordinates to the range [0, height].
+
+        For RRPN:
+        Only clip boxes that are almost horizontal with a tolerance of
+        clip_angle_threshold to maintain backward compatibility.
+
+        Rotated boxes beyond this threshold are not clipped for two reasons:
+
+        1. There are potentially multiple ways to clip a rotated box to make it
+           fit within the image.
+        2. It's tricky to make the entire rectangular box fit within the image
+           and still be able to not leave out pixels of interest.
+
+        Therefore we rely on ops like RoIAlignRotated to safely handle this.
+
+        Args:
+            box_size (height, width): The clipping box's size.
+            clip_angle_threshold:
+                Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees),
+                we do the clipping as horizontal boxes.
+        """
+        h, w = box_size
+
+        # normalize angles to be within (-180, 180] degrees
+        self.normalize_angles()
+
+        idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0]
+
+        # convert to (x1, y1, x2, y2)
+        x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0
+        y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0
+        x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0
+        y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0
+
+        # clip
+        x1.clamp_(min=0, max=w)
+        y1.clamp_(min=0, max=h)
+        x2.clamp_(min=0, max=w)
+        y2.clamp_(min=0, max=h)
+
+        # convert back to (xc, yc, w, h)
+        self.tensor[idx, 0] = (x1 + x2) / 2.0
+        self.tensor[idx, 1] = (y1 + y2) / 2.0
+        # make sure widths and heights do not increase due to numerical errors
+        self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1)
+        self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1)
+
+    def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
+        """
+        Find boxes that are non-empty.
+        A box is considered empty, if either of its side is no larger than threshold.
+
+        Returns:
+            Tensor: a binary vector which represents
+            whether each box is empty (False) or non-empty (True).
+        """
+        box = self.tensor
+        widths = box[:, 2]
+        heights = box[:, 3]
+        keep = (widths > threshold) & (heights > threshold)
+        return keep
+
+    def __getitem__(self, item) -> "RotatedBoxes":
+        """
+        Returns:
+            RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box.
+        2. `new_boxes = boxes[2:10]`: return a slice of boxes.
+        3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor
+           with `length = len(boxes)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned RotatedBoxes might share storage with this RotatedBoxes,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return RotatedBoxes(self.tensor[item].view(1, -1))
+        b = self.tensor[item]
+        assert b.dim() == 2, "Indexing on RotatedBoxes with {} failed to return a matrix!".format(
+            item
+        )
+        return RotatedBoxes(b)
+
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+
+    def __repr__(self) -> str:
+        return "RotatedBoxes(" + str(self.tensor) + ")"
+
+    def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor:
+        """
+        Args:
+            box_size (height, width): Size of the reference box covering
+                [0, width] x [0, height]
+            boundary_threshold (int): Boxes that extend beyond the reference box
+                boundary by more than boundary_threshold are considered "outside".
+
+        For RRPN, it might not be necessary to call this function since it's common
+        for rotated box to extend to outside of the image boundaries
+        (the clip function only clips the near-horizontal boxes)
+
+        Returns:
+            a binary vector, indicating whether each box is inside the reference box.
+        """
+        height, width = box_size
+
+        cnt_x = self.tensor[..., 0]
+        cnt_y = self.tensor[..., 1]
+        half_w = self.tensor[..., 2] / 2.0
+        half_h = self.tensor[..., 3] / 2.0
+        a = self.tensor[..., 4]
+        c = torch.abs(torch.cos(a * math.pi / 180.0))
+        s = torch.abs(torch.sin(a * math.pi / 180.0))
+        # This basically computes the horizontal bounding rectangle of the rotated box
+        max_rect_dx = c * half_w + s * half_h
+        max_rect_dy = c * half_h + s * half_w
+
+        inds_inside = (
+            (cnt_x - max_rect_dx >= -boundary_threshold)
+            & (cnt_y - max_rect_dy >= -boundary_threshold)
+            & (cnt_x + max_rect_dx < width + boundary_threshold)
+            & (cnt_y + max_rect_dy < height + boundary_threshold)
+        )
+
+        return inds_inside
+
+    def get_centers(self) -> torch.Tensor:
+        """
+        Returns:
+            The box centers in a Nx2 array of (x, y).
+        """
+        return self.tensor[:, :2]
+
+    def scale(self, scale_x: float, scale_y: float) -> None:
+        """
+        Scale the rotated box with horizontal and vertical scaling factors
+        Note: when scale_factor_x != scale_factor_y,
+        the rotated box does not preserve the rectangular shape when the angle
+        is not a multiple of 90 degrees under resize transformation.
+        Instead, the shape is a parallelogram (that has skew)
+        Here we make an approximation by fitting a rotated rectangle to the parallelogram.
+        """
+        self.tensor[:, 0] *= scale_x
+        self.tensor[:, 1] *= scale_y
+        theta = self.tensor[:, 4] * math.pi / 180.0
+        c = torch.cos(theta)
+        s = torch.sin(theta)
+
+        # In image space, y is top->down and x is left->right
+        # Consider the local coordintate system for the rotated box,
+        # where the box center is located at (0, 0), and the four vertices ABCD are
+        # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2)
+        # the midpoint of the left edge AD of the rotated box E is:
+        # E = (A+D)/2 = (-w / 2, 0)
+        # the midpoint of the top edge AB of the rotated box F is:
+        # F(0, -h / 2)
+        # To get the old coordinates in the global system, apply the rotation transformation
+        # (Note: the right-handed coordinate system for image space is yOx):
+        # (old_x, old_y) = (s * y + c * x, c * y - s * x)
+        # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2)
+        # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2)
+        # After applying the scaling factor (sfx, sfy):
+        # E(new) = (-sfx * c * w / 2, sfy * s * w / 2)
+        # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2)
+        # The new width after scaling tranformation becomes:
+
+        # w(new) = |E(new) - O| * 2
+        #        = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2
+        #        = sqrt[(sfx * c)^2 + (sfy * s)^2] * w
+        # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2]
+        #
+        # For example,
+        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x;
+        # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y
+        self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2)
+
+        # h(new) = |F(new) - O| * 2
+        #        = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2
+        #        = sqrt[(sfx * s)^2 + (sfy * c)^2] * h
+        # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2]
+        #
+        # For example,
+        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y;
+        # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x
+        self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2)
+
+        # The angle is the rotation angle from y-axis in image space to the height
+        # vector (top->down in the box's local coordinate system) of the box in CCW.
+        #
+        # angle(new) = angle_yOx(O - F(new))
+        #            = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) )
+        #            = atan2(sfx * s * h / 2, sfy * c * h / 2)
+        #            = atan2(sfx * s, sfy * c)
+        #
+        # For example,
+        # when sfx == sfy, angle(new) == atan2(s, c) == angle(old)
+        self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi
+
+    @classmethod
+    @_maybe_jit_unused
+    def cat(cls, boxes_list: List["RotatedBoxes"]) -> "RotatedBoxes":
+        """
+        Concatenates a list of RotatedBoxes into a single RotatedBoxes
+
+        Arguments:
+            boxes_list (list[RotatedBoxes])
+
+        Returns:
+            RotatedBoxes: the concatenated RotatedBoxes
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all([isinstance(box, RotatedBoxes) for box in boxes_list])
+
+        # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
+        return cat_boxes
+
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+
+    @torch.jit.unused
+    def __iter__(self):
+        """
+        Yield a box as a Tensor of shape (5,) at a time.
+        """
+        yield from self.tensor
+
+
+def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None:
+    """
+    Given two lists of rotated boxes of size N and M,
+    compute the IoU (intersection over union)
+    between **all** N x M pairs of boxes.
+    The box order must be (x_center, y_center, width, height, angle).
+
+    Args:
+        boxes1, boxes2 (RotatedBoxes):
+            two `RotatedBoxes`. Contains N & M rotated boxes, respectively.
+
+    Returns:
+        Tensor: IoU, sized [N,M].
+    """
+
+    return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor)
diff --git a/src/sts/detectron2/utils/README.md b/src/sts/detectron2/utils/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9765b24a730b77556104187ac3ef5439ab0859fd
--- /dev/null
+++ b/src/sts/detectron2/utils/README.md
@@ -0,0 +1,5 @@
+# Utility functions
+
+This folder contain utility functions that are not used in the
+core library, but are useful for building models or training
+code using the config system.
diff --git a/src/sts/detectron2/utils/__init__.py b/src/sts/detectron2/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9020c2df23e2af280b7bb168b996ae9eaf312eb8
--- /dev/null
+++ b/src/sts/detectron2/utils/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
diff --git a/src/sts/detectron2/utils/__pycache__/__init__.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f175663c15f901006720a39746bf92a657de3eff
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/collect_env.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/collect_env.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad7fd1933eaa9ceef8f8d4ca703377b459539455
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/collect_env.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/colormap.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/colormap.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..001710c76ebaba1d447ac561b60c09eb7fb7351a
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/colormap.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/comm.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/comm.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..655db161b2726ab6ca9716eae47c4dcd14e82ab6
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/comm.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/env.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/env.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0672471854a4b0517119a44bb06459b3bf479c96
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/env.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/events.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/events.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..03ed03363682c9eebf62aca2543e438635e6aa67
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/events.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/file_io.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/file_io.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..857af6218aa1e89661f99848c360fa567e9afae5
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/file_io.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/logger.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/logger.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..787d78c46997f1000866a385404e0493a4dc3670
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/logger.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/memory.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/memory.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cb9980416e73cbf5aeb5e7a74df4dfff33e317ec
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/memory.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/registry.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/registry.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a555bf938bbdf0079fe86195b7e38581c1a6f10
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/registry.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/serialize.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/serialize.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f4b8b40b05152de534448388b20cd532b1c68682
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/serialize.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/video_visualizer.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/video_visualizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e5f4830a24e11e948c31ba5d09aadefdcfc482b
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/video_visualizer.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/visualizer.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/visualizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..06bbceb98a8f1666c215e0d9fbf07f0816853ed9
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/visualizer.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/visualizer_chn.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/visualizer_chn.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81b11a34079756a20e9c52fd1c65b43fe8cfdf94
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/visualizer_chn.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/__pycache__/visualizer_vintext.cpython-38.pyc b/src/sts/detectron2/utils/__pycache__/visualizer_vintext.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f9e817cdeeb0f27085b613492f0fe9835aee7dfe
Binary files /dev/null and b/src/sts/detectron2/utils/__pycache__/visualizer_vintext.cpython-38.pyc differ
diff --git a/src/sts/detectron2/utils/analysis.py b/src/sts/detectron2/utils/analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..51b453cabb2f369e470296d468955432446de1a5
--- /dev/null
+++ b/src/sts/detectron2/utils/analysis.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# -*- coding: utf-8 -*-
+
+import typing
+import fvcore
+from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table
+from torch import nn
+
+from detectron2.export import TracingAdapter
+
+__all__ = [
+    "activation_count_operators",
+    "flop_count_operators",
+    "parameter_count_table",
+    "parameter_count",
+]
+
+FLOPS_MODE = "flops"
+ACTIVATIONS_MODE = "activations"
+
+
+# Some extra ops to ignore from counting, including elementwise and reduction ops
+_IGNORED_OPS = {
+    "aten::add",
+    "aten::add_",
+    "aten::argmax",
+    "aten::argsort",
+    "aten::batch_norm",
+    "aten::constant_pad_nd",
+    "aten::div",
+    "aten::div_",
+    "aten::exp",
+    "aten::log2",
+    "aten::max_pool2d",
+    "aten::meshgrid",
+    "aten::mul",
+    "aten::mul_",
+    "aten::neg",
+    "aten::nonzero_numpy",
+    "aten::reciprocal",
+    "aten::rsub",
+    "aten::sigmoid",
+    "aten::sigmoid_",
+    "aten::softmax",
+    "aten::sort",
+    "aten::sqrt",
+    "aten::sub",
+    "torchvision::nms",  # TODO estimate flop for nms
+}
+
+
+class FlopCountAnalysis(fvcore.nn.FlopCountAnalysis):
+    """
+    Same as :class:`fvcore.nn.FlopCountAnalysis`, but supports detectron2 models.
+    """
+
+    def __init__(self, model, inputs):
+        """
+        Args:
+            model (nn.Module):
+            inputs (Any): inputs of the given model. Does not have to be tuple of tensors.
+        """
+        wrapper = TracingAdapter(model, inputs, allow_non_tensor=True)
+        super().__init__(wrapper, wrapper.flattened_inputs)
+        self.set_op_handle(**{k: None for k in _IGNORED_OPS})
+
+
+def flop_count_operators(model: nn.Module, inputs: list) -> typing.DefaultDict[str, float]:
+    """
+    Implement operator-level flops counting using jit.
+    This is a wrapper of :func:`fvcore.nn.flop_count` and adds supports for standard
+    detection models in detectron2.
+    Please use :class:`FlopCountAnalysis` for more advanced functionalities.
+
+    Note:
+        The function runs the input through the model to compute flops.
+        The flops of a detection model is often input-dependent, for example,
+        the flops of box & mask head depends on the number of proposals &
+        the number of detected objects.
+        Therefore, the flops counting using a single input may not accurately
+        reflect the computation cost of a model. It's recommended to average
+        across a number of inputs.
+
+    Args:
+        model: a detectron2 model that takes `list[dict]` as input.
+        inputs (list[dict]): inputs to model, in detectron2's standard format.
+            Only "image" key will be used.
+        supported_ops (dict[str, Handle]): see documentation of :func:`fvcore.nn.flop_count`
+
+    Returns:
+        Counter: Gflop count per operator
+    """
+    old_train = model.training
+    model.eval()
+    ret = FlopCountAnalysis(model, inputs).by_operator()
+    model.train(old_train)
+    return {k: v / 1e9 for k, v in ret.items()}
+
+
+def activation_count_operators(
+    model: nn.Module, inputs: list, **kwargs
+) -> typing.DefaultDict[str, float]:
+    """
+    Implement operator-level activations counting using jit.
+    This is a wrapper of fvcore.nn.activation_count, that supports standard detection models
+    in detectron2.
+
+    Note:
+        The function runs the input through the model to compute activations.
+        The activations of a detection model is often input-dependent, for example,
+        the activations of box & mask head depends on the number of proposals &
+        the number of detected objects.
+
+    Args:
+        model: a detectron2 model that takes `list[dict]` as input.
+        inputs (list[dict]): inputs to model, in detectron2's standard format.
+            Only "image" key will be used.
+
+    Returns:
+        Counter: activation count per operator
+    """
+    return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs)
+
+
+def _wrapper_count_operators(
+    model: nn.Module, inputs: list, mode: str, **kwargs
+) -> typing.DefaultDict[str, float]:
+    # ignore some ops
+    supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS}
+    supported_ops.update(kwargs.pop("supported_ops", {}))
+    kwargs["supported_ops"] = supported_ops
+
+    assert len(inputs) == 1, "Please use batch size=1"
+    tensor_input = inputs[0]["image"]
+    inputs = [{"image": tensor_input}]  # remove other keys, in case there are any
+
+    old_train = model.training
+    if isinstance(model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)):
+        model = model.module
+    wrapper = TracingAdapter(model, inputs)
+    wrapper.eval()
+    if mode == FLOPS_MODE:
+        ret = flop_count(wrapper, (tensor_input,), **kwargs)
+    elif mode == ACTIVATIONS_MODE:
+        ret = activation_count(wrapper, (tensor_input,), **kwargs)
+    else:
+        raise NotImplementedError("Count for mode {} is not supported yet.".format(mode))
+    # compatible with change in fvcore
+    if isinstance(ret, tuple):
+        ret = ret[0]
+    model.train(old_train)
+    return ret
diff --git a/src/sts/detectron2/utils/collect_env.py b/src/sts/detectron2/utils/collect_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc079504f6309ce1e4a276e35c5526d8cd14eb3f
--- /dev/null
+++ b/src/sts/detectron2/utils/collect_env.py
@@ -0,0 +1,209 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import importlib
+import numpy as np
+import os
+import re
+import subprocess
+import sys
+from collections import defaultdict
+import PIL
+import torch
+import torchvision
+from tabulate import tabulate
+
+__all__ = ["collect_env_info"]
+
+
+def collect_torch_env():
+    try:
+        import torch.__config__
+
+        return torch.__config__.show()
+    except ImportError:
+        # compatible with older versions of pytorch
+        from torch.utils.collect_env import get_pretty_env_info
+
+        return get_pretty_env_info()
+
+
+def get_env_module():
+    var_name = "DETECTRON2_ENV_MODULE"
+    return var_name, os.environ.get(var_name, "<not set>")
+
+
+def detect_compute_compatibility(CUDA_HOME, so_file):
+    try:
+        cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump")
+        if os.path.isfile(cuobjdump):
+            output = subprocess.check_output(
+                "'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True
+            )
+            output = output.decode("utf-8").strip().split("\n")
+            arch = []
+            for line in output:
+                line = re.findall(r"\.sm_([0-9]*)\.", line)[0]
+                arch.append(".".join(line))
+            arch = sorted(set(arch))
+            return ", ".join(arch)
+        else:
+            return so_file + "; cannot find cuobjdump"
+    except Exception:
+        # unhandled failure
+        return so_file
+
+
+def collect_env_info():
+    has_gpu = torch.cuda.is_available()  # true for both CUDA & ROCM
+    torch_version = torch.__version__
+
+    # NOTE that CUDA_HOME/ROCM_HOME could be None even when CUDA runtime libs are functional
+    from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME
+
+    has_rocm = False
+    if (getattr(torch.version, "hip", None) is not None) and (ROCM_HOME is not None):
+        has_rocm = True
+    has_cuda = has_gpu and (not has_rocm)
+
+    data = []
+    data.append(("sys.platform", sys.platform))  # check-template.yml depends on it
+    data.append(("Python", sys.version.replace("\n", "")))
+    data.append(("numpy", np.__version__))
+
+    try:
+        import detectron2  # noqa
+
+        data.append(
+            ("detectron2", detectron2.__version__ + " @" + os.path.dirname(detectron2.__file__))
+        )
+    except ImportError:
+        data.append(("detectron2", "failed to import"))
+
+    try:
+        import detectron2._C as _C
+    except ImportError as e:
+        data.append(("detectron2._C", f"not built correctly: {e}"))
+
+        # print system compilers when extension fails to build
+        if sys.platform != "win32":  # don't know what to do for windows
+            try:
+                # this is how torch/utils/cpp_extensions.py choose compiler
+                cxx = os.environ.get("CXX", "c++")
+                cxx = subprocess.check_output("'{}' --version".format(cxx), shell=True)
+                cxx = cxx.decode("utf-8").strip().split("\n")[0]
+            except subprocess.SubprocessError:
+                cxx = "Not found"
+            data.append(("Compiler ($CXX)", cxx))
+
+            if has_cuda and CUDA_HOME is not None:
+                try:
+                    nvcc = os.path.join(CUDA_HOME, "bin", "nvcc")
+                    nvcc = subprocess.check_output("'{}' -V".format(nvcc), shell=True)
+                    nvcc = nvcc.decode("utf-8").strip().split("\n")[-1]
+                except subprocess.SubprocessError:
+                    nvcc = "Not found"
+                data.append(("CUDA compiler", nvcc))
+        if has_cuda and sys.platform != "win32":
+            try:
+                so_file = importlib.util.find_spec("detectron2._C").origin
+            except ImportError:
+                pass
+            else:
+                data.append(
+                    ("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, so_file))
+                )
+    else:
+        # print compilers that are used to build extension
+        data.append(("Compiler", _C.get_compiler_version()))
+        data.append(("CUDA compiler", _C.get_cuda_version()))  # cuda or hip
+        if has_cuda and getattr(_C, "has_cuda", lambda: True)():
+            data.append(
+                ("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, _C.__file__))
+            )
+
+    data.append(get_env_module())
+    data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__)))
+    data.append(("PyTorch debug build", torch.version.debug))
+
+    data.append(("GPU available", has_gpu))
+    if has_gpu:
+        devices = defaultdict(list)
+        for k in range(torch.cuda.device_count()):
+            cap = ".".join((str(x) for x in torch.cuda.get_device_capability(k)))
+            name = torch.cuda.get_device_name(k) + f" (arch={cap})"
+            devices[name].append(str(k))
+        for name, devids in devices.items():
+            data.append(("GPU " + ",".join(devids), name))
+
+        if has_rocm:
+            msg = " - invalid!" if not (ROCM_HOME and os.path.isdir(ROCM_HOME)) else ""
+            data.append(("ROCM_HOME", str(ROCM_HOME) + msg))
+        else:
+            msg = " - invalid!" if not (CUDA_HOME and os.path.isdir(CUDA_HOME)) else ""
+            data.append(("CUDA_HOME", str(CUDA_HOME) + msg))
+
+            cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
+            if cuda_arch_list:
+                data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
+    data.append(("Pillow", PIL.__version__))
+
+    try:
+        data.append(
+            (
+                "torchvision",
+                str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__),
+            )
+        )
+        if has_cuda:
+            try:
+                torchvision_C = importlib.util.find_spec("torchvision._C").origin
+                msg = detect_compute_compatibility(CUDA_HOME, torchvision_C)
+                data.append(("torchvision arch flags", msg))
+            except ImportError:
+                data.append(("torchvision._C", "Not found"))
+    except AttributeError:
+        data.append(("torchvision", "unknown"))
+
+    try:
+        import fvcore
+
+        data.append(("fvcore", fvcore.__version__))
+    except ImportError:
+        pass
+
+    try:
+        import iopath
+
+        data.append(("iopath", iopath.__version__))
+    except (ImportError, AttributeError):
+        pass
+
+    try:
+        import cv2
+
+        data.append(("cv2", cv2.__version__))
+    except ImportError:
+        data.append(("cv2", "Not found"))
+    env_str = tabulate(data) + "\n"
+    env_str += collect_torch_env()
+    return env_str
+
+
+if __name__ == "__main__":
+    try:
+        from detectron2.utils.collect_env import collect_env_info as f
+
+        print(f())
+    except ImportError:
+        print(collect_env_info())
+
+    if torch.cuda.is_available():
+        for k in range(torch.cuda.device_count()):
+            device = f"cuda:{k}"
+            try:
+                x = torch.tensor([1, 2.0], dtype=torch.float32)
+                x = x.to(device)
+            except Exception as e:
+                print(
+                    f"Unable to copy tensor to device={device}: {e}. "
+                    "Your CUDA environment is broken."
+                )
diff --git a/src/sts/detectron2/utils/colormap.py b/src/sts/detectron2/utils/colormap.py
new file mode 100644
index 0000000000000000000000000000000000000000..150ccc372262ec4de0b36db66a303cae9495e67f
--- /dev/null
+++ b/src/sts/detectron2/utils/colormap.py
@@ -0,0 +1,140 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+"""
+An awesome colormap for really neat visualizations.
+Copied from Detectron, and removed gray colors.
+"""
+
+import numpy as np
+
+__all__ = ["colormap", "random_color"]
+
+# fmt: off
+# RGB:
+_COLORS = np.array(
+    [
+        0.000, 0.447, 0.741,
+        0.850, 0.325, 0.098,
+        0.929, 0.694, 0.125,
+        0.494, 0.184, 0.556,
+        0.466, 0.674, 0.188,
+        0.301, 0.745, 0.933,
+        0.635, 0.078, 0.184,
+        0.300, 0.300, 0.300,
+        0.600, 0.600, 0.600,
+        1.000, 0.000, 0.000,
+        1.000, 0.500, 0.000,
+        0.749, 0.749, 0.000,
+        0.000, 1.000, 0.000,
+        0.000, 0.000, 1.000,
+        0.667, 0.000, 1.000,
+        0.333, 0.333, 0.000,
+        0.333, 0.667, 0.000,
+        0.333, 1.000, 0.000,
+        0.667, 0.333, 0.000,
+        0.667, 0.667, 0.000,
+        0.667, 1.000, 0.000,
+        1.000, 0.333, 0.000,
+        1.000, 0.667, 0.000,
+        1.000, 1.000, 0.000,
+        0.000, 0.333, 0.500,
+        0.000, 0.667, 0.500,
+        0.000, 1.000, 0.500,
+        0.333, 0.000, 0.500,
+        0.333, 0.333, 0.500,
+        0.333, 0.667, 0.500,
+        0.333, 1.000, 0.500,
+        0.667, 0.000, 0.500,
+        0.667, 0.333, 0.500,
+        0.667, 0.667, 0.500,
+        0.667, 1.000, 0.500,
+        1.000, 0.000, 0.500,
+        1.000, 0.333, 0.500,
+        1.000, 0.667, 0.500,
+        1.000, 1.000, 0.500,
+        0.000, 0.333, 1.000,
+        0.000, 0.667, 1.000,
+        0.000, 1.000, 1.000,
+        0.333, 0.000, 1.000,
+        0.333, 0.333, 1.000,
+        0.333, 0.667, 1.000,
+        0.333, 1.000, 1.000,
+        0.667, 0.000, 1.000,
+        0.667, 0.333, 1.000,
+        0.667, 0.667, 1.000,
+        0.667, 1.000, 1.000,
+        1.000, 0.000, 1.000,
+        1.000, 0.333, 1.000,
+        1.000, 0.667, 1.000,
+        0.333, 0.000, 0.000,
+        0.500, 0.000, 0.000,
+        0.667, 0.000, 0.000,
+        0.833, 0.000, 0.000,
+        1.000, 0.000, 0.000,
+        0.000, 0.167, 0.000,
+        0.000, 0.333, 0.000,
+        0.000, 0.500, 0.000,
+        0.000, 0.667, 0.000,
+        0.000, 0.833, 0.000,
+        0.000, 1.000, 0.000,
+        0.000, 0.000, 0.167,
+        0.000, 0.000, 0.333,
+        0.000, 0.000, 0.500,
+        0.000, 0.000, 0.667,
+        0.000, 0.000, 0.833,
+        0.000, 0.000, 1.000,
+        0.000, 0.000, 0.000,
+        0.143, 0.143, 0.143,
+        0.857, 0.857, 0.857,
+        1.000, 1.000, 1.000
+    ]
+).astype(np.float32).reshape(-1, 3)
+# fmt: on
+
+
+def colormap(rgb=False, maximum=255):
+    """
+    Args:
+        rgb (bool): whether to return RGB colors or BGR colors.
+        maximum (int): either 255 or 1
+
+    Returns:
+        ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
+    """
+    assert maximum in [255, 1], maximum
+    c = _COLORS * maximum
+    if not rgb:
+        c = c[:, ::-1]
+    return c
+
+
+def random_color(rgb=False, maximum=255):
+    """
+    Args:
+        rgb (bool): whether to return RGB colors or BGR colors.
+        maximum (int): either 255 or 1
+
+    Returns:
+        ndarray: a vector of 3 numbers
+    """
+    idx = np.random.randint(0, len(_COLORS))
+    ret = _COLORS[idx] * maximum
+    if not rgb:
+        ret = ret[::-1]
+    return ret
+
+
+if __name__ == "__main__":
+    import cv2
+
+    size = 100
+    H, W = 10, 10
+    canvas = np.random.rand(H * size, W * size, 3).astype("float32")
+    for h in range(H):
+        for w in range(W):
+            idx = h * W + w
+            if idx >= len(_COLORS):
+                break
+            canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx]
+    cv2.imshow("a", canvas)
+    cv2.waitKey(0)
diff --git a/src/sts/detectron2/utils/comm.py b/src/sts/detectron2/utils/comm.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b199a176c4fcecab155674c52fa7dce2740315c
--- /dev/null
+++ b/src/sts/detectron2/utils/comm.py
@@ -0,0 +1,263 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+This file contains primitives for multi-gpu communication.
+This is useful when doing distributed training.
+"""
+
+import functools
+import logging
+import numpy as np
+import pickle
+import torch
+import torch.distributed as dist
+
+_LOCAL_PROCESS_GROUP = None
+"""
+A torch process group which only includes processes that on the same machine as the current process.
+This variable is set when processes are spawned by `launch()` in "engine/launch.py".
+"""
+
+
+def get_world_size() -> int:
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank() -> int:
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def get_local_rank() -> int:
+    """
+    Returns:
+        The rank of the current process within the local (per-machine) process group.
+    """
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    assert _LOCAL_PROCESS_GROUP is not None
+    return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
+
+
+def get_local_size() -> int:
+    """
+    Returns:
+        The size of the per-machine process group,
+        i.e. the number of processes per machine.
+    """
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
+
+
+def is_main_process() -> bool:
+    return get_rank() == 0
+
+
+def synchronize():
+    """
+    Helper function to synchronize (barrier) among all processes when
+    using distributed training
+    """
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+
+
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+    if dist.get_backend() == "nccl":
+        return dist.new_group(backend="gloo")
+    else:
+        return dist.group.WORLD
+
+
+def _serialize_to_tensor(data, group):
+    backend = dist.get_backend(group)
+    assert backend in ["gloo", "nccl"]
+    device = torch.device("cpu" if backend == "gloo" else "cuda")
+
+    buffer = pickle.dumps(data)
+    if len(buffer) > 1024 ** 3:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
+                get_rank(), len(buffer) / (1024 ** 3), device
+            )
+        )
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to(device=device)
+    return tensor
+
+
+def _pad_to_largest_tensor(tensor, group):
+    """
+    Returns:
+        list[int]: size of the tensor, on each rank
+        Tensor: padded tensor that has the max size
+    """
+    world_size = dist.get_world_size(group=group)
+    assert (
+        world_size >= 1
+    ), "comm.gather/all_gather must be called from ranks within the given group!"
+    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
+    size_list = [
+        torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
+    ]
+    dist.all_gather(size_list, local_size, group=group)
+    size_list = [int(size.item()) for size in size_list]
+
+    max_size = max(size_list)
+
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    if local_size != max_size:
+        padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
+        tensor = torch.cat((tensor, padding), dim=0)
+    return size_list, tensor
+
+
+def all_gather(data, group=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors).
+
+    Args:
+        data: any picklable object
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return [data]
+
+    tensor = _serialize_to_tensor(data, group)
+
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    tensor_list = [
+        torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+    ]
+    dist.all_gather(tensor_list, tensor, group=group)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def gather(data, dst=0, group=None):
+    """
+    Run gather on arbitrary picklable data (not necessarily tensors).
+
+    Args:
+        data: any picklable object
+        dst (int): destination rank
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+
+    Returns:
+        list[data]: on dst, a list of data gathered from each rank. Otherwise,
+            an empty list.
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group=group) == 1:
+        return [data]
+    rank = dist.get_rank(group=group)
+
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+
+    # receiving Tensor from all ranks
+    if rank == dst:
+        max_size = max(size_list)
+        tensor_list = [
+            torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+        ]
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+
+        data_list = []
+        for size, tensor in zip(size_list, tensor_list):
+            buffer = tensor.cpu().numpy().tobytes()[:size]
+            data_list.append(pickle.loads(buffer))
+        return data_list
+    else:
+        dist.gather(tensor, [], dst=dst, group=group)
+        return []
+
+
+def shared_random_seed():
+    """
+    Returns:
+        int: a random number that is the same across all workers.
+            If workers need a shared RNG, they can use this shared seed to
+            create one.
+
+    All workers must call this function, otherwise it will deadlock.
+    """
+    ints = np.random.randint(2 ** 31)
+    all_ints = all_gather(ints)
+    return all_ints[0]
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Reduce the values in the dictionary from all processes so that process with rank
+    0 has the reduced results.
+
+    Args:
+        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
+        average (bool): whether to do average or sum
+
+    Returns:
+        a dict with the same keys as input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.reduce(values, dst=0)
+        if dist.get_rank() == 0 and average:
+            # only main process gets accumulated, so only divide by
+            # world_size in this case
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
diff --git a/src/sts/detectron2/utils/env.py b/src/sts/detectron2/utils/env.py
new file mode 100644
index 0000000000000000000000000000000000000000..40634c17c73273ac8927632be164f466cfe7d1fa
--- /dev/null
+++ b/src/sts/detectron2/utils/env.py
@@ -0,0 +1,170 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import importlib
+import importlib.util
+import logging
+import numpy as np
+import os
+import random
+import sys
+from datetime import datetime
+import torch
+
+__all__ = ["seed_all_rng"]
+
+
+TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
+"""
+PyTorch version as a tuple of 2 ints. Useful for comparison.
+"""
+
+
+DOC_BUILDING = os.getenv("_DOC_BUILDING", False)  # set in docs/conf.py
+"""
+Whether we're building documentation.
+"""
+
+
+def seed_all_rng(seed=None):
+    """
+    Set the random seed for the RNG in torch, numpy and python.
+
+    Args:
+        seed (int): if None, will use a strong random seed.
+    """
+    if seed is None:
+        seed = (
+            os.getpid()
+            + int(datetime.now().strftime("%S%f"))
+            + int.from_bytes(os.urandom(2), "big")
+        )
+        logger = logging.getLogger(__name__)
+        logger.info("Using a generated random seed {}".format(seed))
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    random.seed(seed)
+    os.environ["PYTHONHASHSEED"] = str(seed)
+
+
+# from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
+def _import_file(module_name, file_path, make_importable=False):
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    if make_importable:
+        sys.modules[module_name] = module
+    return module
+
+
+def _configure_libraries():
+    """
+    Configurations for some libraries.
+    """
+    # An environment option to disable `import cv2` globally,
+    # in case it leads to negative performance impact
+    disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False))
+    if disable_cv2:
+        sys.modules["cv2"] = None
+    else:
+        # Disable opencl in opencv since its interaction with cuda often has negative effects
+        # This envvar is supported after OpenCV 3.4.0
+        os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
+        try:
+            import cv2
+
+            if int(cv2.__version__.split(".")[0]) >= 3:
+                cv2.ocl.setUseOpenCL(False)
+        except ModuleNotFoundError:
+            # Other types of ImportError, if happened, should not be ignored.
+            # Because a failed opencv import could mess up address space
+            # https://github.com/skvark/opencv-python/issues/381
+            pass
+
+    def get_version(module, digit=2):
+        return tuple(map(int, module.__version__.split(".")[:digit]))
+
+    # fmt: off
+    assert get_version(torch) >= (1, 4), "Requires torch>=1.4"
+    import fvcore
+    assert get_version(fvcore, 3) >= (0, 1, 2), "Requires fvcore>=0.1.2"
+    import yaml
+    assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1"
+    # fmt: on
+
+
+_ENV_SETUP_DONE = False
+
+
+def setup_environment():
+    """Perform environment setup work. The default setup is a no-op, but this
+    function allows the user to specify a Python source file or a module in
+    the $DETECTRON2_ENV_MODULE environment variable, that performs
+    custom setup work that may be necessary to their computing environment.
+    """
+    global _ENV_SETUP_DONE
+    if _ENV_SETUP_DONE:
+        return
+    _ENV_SETUP_DONE = True
+
+    _configure_libraries()
+
+    custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE")
+
+    if custom_module_path:
+        setup_custom_environment(custom_module_path)
+    else:
+        # The default setup is a no-op
+        pass
+
+
+def setup_custom_environment(custom_module):
+    """
+    Load custom environment setup by importing a Python source file or a
+    module, and run the setup function.
+    """
+    if custom_module.endswith(".py"):
+        module = _import_file("detectron2.utils.env.custom_module", custom_module)
+    else:
+        module = importlib.import_module(custom_module)
+    assert hasattr(module, "setup_environment") and callable(module.setup_environment), (
+        "Custom environment module defined in {} does not have the "
+        "required callable attribute 'setup_environment'."
+    ).format(custom_module)
+    module.setup_environment()
+
+
+def fixup_module_metadata(module_name, namespace, keys=None):
+    """
+    Fix the __qualname__ of module members to be their exported api name, so
+    when they are referenced in docs, sphinx can find them. Reference:
+    https://github.com/python-trio/trio/blob/6754c74eacfad9cc5c92d5c24727a2f3b620624e/trio/_util.py#L216-L241
+    """
+    if not DOC_BUILDING:
+        return
+    seen_ids = set()
+
+    def fix_one(qualname, name, obj):
+        # avoid infinite recursion (relevant when using
+        # typing.Generic, for example)
+        if id(obj) in seen_ids:
+            return
+        seen_ids.add(id(obj))
+
+        mod = getattr(obj, "__module__", None)
+        if mod is not None and (mod.startswith(module_name) or mod.startswith("fvcore.")):
+            obj.__module__ = module_name
+            # Modules, unlike everything else in Python, put fully-qualitied
+            # names into their __name__ attribute. We check for "." to avoid
+            # rewriting these.
+            if hasattr(obj, "__name__") and "." not in obj.__name__:
+                obj.__name__ = name
+                obj.__qualname__ = qualname
+            if isinstance(obj, type):
+                for attr_name, attr_value in obj.__dict__.items():
+                    fix_one(objname + "." + attr_name, attr_name, attr_value)
+
+    if keys is None:
+        keys = namespace.keys()
+    for objname in keys:
+        if not objname.startswith("_"):
+            obj = namespace[objname]
+            fix_one(objname, objname, obj)
diff --git a/src/sts/detectron2/utils/events.py b/src/sts/detectron2/utils/events.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dee954bdd6ad7dc5ea999562d1d2b03c3a520d9
--- /dev/null
+++ b/src/sts/detectron2/utils/events.py
@@ -0,0 +1,486 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import datetime
+import json
+import logging
+import os
+import time
+from collections import defaultdict
+from contextlib import contextmanager
+from typing import Optional
+import torch
+from fvcore.common.history_buffer import HistoryBuffer
+
+from detectron2.utils.file_io import PathManager
+
+__all__ = [
+    "get_event_storage",
+    "JSONWriter",
+    "TensorboardXWriter",
+    "CommonMetricPrinter",
+    "EventStorage",
+]
+
+_CURRENT_STORAGE_STACK = []
+
+
+def get_event_storage():
+    """
+    Returns:
+        The :class:`EventStorage` object that's currently being used.
+        Throws an error if no :class:`EventStorage` is currently enabled.
+    """
+    assert len(
+        _CURRENT_STORAGE_STACK
+    ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!"
+    return _CURRENT_STORAGE_STACK[-1]
+
+
+class EventWriter:
+    """
+    Base class for writers that obtain events from :class:`EventStorage` and process them.
+    """
+
+    def write(self):
+        raise NotImplementedError
+
+    def close(self):
+        pass
+
+
+class JSONWriter(EventWriter):
+    """
+    Write scalars to a json file.
+
+    It saves scalars as one json per line (instead of a big json) for easy parsing.
+
+    Examples parsing such a json file:
+    ::
+        $ cat metrics.json | jq -s '.[0:2]'
+        [
+          {
+            "data_time": 0.008433341979980469,
+            "iteration": 19,
+            "loss": 1.9228371381759644,
+            "loss_box_reg": 0.050025828182697296,
+            "loss_classifier": 0.5316952466964722,
+            "loss_mask": 0.7236229181289673,
+            "loss_rpn_box": 0.0856662318110466,
+            "loss_rpn_cls": 0.48198649287223816,
+            "lr": 0.007173333333333333,
+            "time": 0.25401854515075684
+          },
+          {
+            "data_time": 0.007216215133666992,
+            "iteration": 39,
+            "loss": 1.282649278640747,
+            "loss_box_reg": 0.06222952902317047,
+            "loss_classifier": 0.30682939291000366,
+            "loss_mask": 0.6970193982124329,
+            "loss_rpn_box": 0.038663312792778015,
+            "loss_rpn_cls": 0.1471673548221588,
+            "lr": 0.007706666666666667,
+            "time": 0.2490077018737793
+          }
+        ]
+
+        $ cat metrics.json | jq '.loss_mask'
+        0.7126231789588928
+        0.689423680305481
+        0.6776131987571716
+        ...
+
+    """
+
+    def __init__(self, json_file, window_size=20):
+        """
+        Args:
+            json_file (str): path to the json file. New data will be appended if the file exists.
+            window_size (int): the window size of median smoothing for the scalars whose
+                `smoothing_hint` are True.
+        """
+        self._file_handle = PathManager.open(json_file, "a")
+        self._window_size = window_size
+        self._last_write = -1
+
+    def write(self):
+        storage = get_event_storage()
+        to_save = defaultdict(dict)
+
+        for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items():
+            # keep scalars that have not been written
+            if iter <= self._last_write:
+                continue
+            to_save[iter][k] = v
+        if len(to_save):
+            all_iters = sorted(to_save.keys())
+            self._last_write = max(all_iters)
+
+        for itr, scalars_per_iter in to_save.items():
+            scalars_per_iter["iteration"] = itr
+            self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n")
+        self._file_handle.flush()
+        try:
+            os.fsync(self._file_handle.fileno())
+        except AttributeError:
+            pass
+
+    def close(self):
+        self._file_handle.close()
+
+
+class TensorboardXWriter(EventWriter):
+    """
+    Write all scalars to a tensorboard file.
+    """
+
+    def __init__(self, log_dir: str, window_size: int = 20, **kwargs):
+        """
+        Args:
+            log_dir (str): the directory to save the output events
+            window_size (int): the scalars will be median-smoothed by this window size
+
+            kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)`
+        """
+        self._window_size = window_size
+        from torch.utils.tensorboard import SummaryWriter
+
+        self._writer = SummaryWriter(log_dir, **kwargs)
+        self._last_write = -1
+
+    def write(self):
+        storage = get_event_storage()
+        new_last_write = self._last_write
+        for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items():
+            if iter > self._last_write:
+                self._writer.add_scalar(k, v, iter)
+                new_last_write = max(new_last_write, iter)
+        self._last_write = new_last_write
+
+        # storage.put_{image,histogram} is only meant to be used by
+        # tensorboard writer. So we access its internal fields directly from here.
+        if len(storage._vis_data) >= 1:
+            for img_name, img, step_num in storage._vis_data:
+                self._writer.add_image(img_name, img, step_num)
+            # Storage stores all image data and rely on this writer to clear them.
+            # As a result it assumes only one writer will use its image data.
+            # An alternative design is to let storage store limited recent
+            # data (e.g. only the most recent image) that all writers can access.
+            # In that case a writer may not see all image data if its period is long.
+            storage.clear_images()
+
+        if len(storage._histograms) >= 1:
+            for params in storage._histograms:
+                self._writer.add_histogram_raw(**params)
+            storage.clear_histograms()
+
+    def close(self):
+        if hasattr(self, "_writer"):  # doesn't exist when the code fails at import
+            self._writer.close()
+
+
+class CommonMetricPrinter(EventWriter):
+    """
+    Print **common** metrics to the terminal, including
+    iteration time, ETA, memory, all losses, and the learning rate.
+    It also applies smoothing using a window of 20 elements.
+
+    It's meant to print common metrics in common ways.
+    To print something in more customized ways, please implement a similar printer by yourself.
+    """
+
+    def __init__(self, max_iter: Optional[int] = None, window_size: int = 20):
+        """
+        Args:
+            max_iter: the maximum number of iterations to train.
+                Used to compute ETA. If not given, ETA will not be printed.
+            window_size (int): the losses will be median-smoothed by this window size
+        """
+        self.logger = logging.getLogger(__name__)
+        self._max_iter = max_iter
+        self._window_size = window_size
+        self._last_write = None  # (step, time) of last call to write(). Used to compute ETA
+
+    def _get_eta(self, storage) -> Optional[str]:
+        if self._max_iter is None:
+            return ""
+        iteration = storage.iter
+        try:
+            eta_seconds = storage.history("time").median(1000) * (self._max_iter - iteration - 1)
+            storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False)
+            return str(datetime.timedelta(seconds=int(eta_seconds)))
+        except KeyError:
+            # estimate eta on our own - more noisy
+            eta_string = None
+            if self._last_write is not None:
+                estimate_iter_time = (time.perf_counter() - self._last_write[1]) / (
+                    iteration - self._last_write[0]
+                )
+                eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+            self._last_write = (iteration, time.perf_counter())
+            return eta_string
+
+    def write(self):
+        storage = get_event_storage()
+        iteration = storage.iter
+        if iteration == self._max_iter:
+            # This hook only reports training progress (loss, ETA, etc) but not other data,
+            # therefore do not write anything after training succeeds, even if this method
+            # is called.
+            return
+
+        try:
+            data_time = storage.history("data_time").avg(20)
+        except KeyError:
+            # they may not exist in the first few iterations (due to warmup)
+            # or when SimpleTrainer is not used
+            data_time = None
+        try:
+            iter_time = storage.history("time").global_avg()
+        except KeyError:
+            iter_time = None
+        try:
+            lr = "{:.5g}".format(storage.history("lr").latest())
+        except KeyError:
+            lr = "N/A"
+
+        eta_string = self._get_eta(storage)
+
+        if torch.cuda.is_available():
+            max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0
+        else:
+            max_mem_mb = None
+
+        # NOTE: max_mem is parsed by grep in "dev/parse_results.sh"
+        self.logger.info(
+            " {eta}iter: {iter}  {losses}  {time}{data_time}lr: {lr}  {memory}".format(
+                eta=f"eta: {eta_string}  " if eta_string else "",
+                iter=iteration,
+                losses="  ".join(
+                    [
+                        "{}: {:.4g}".format(k, v.median(self._window_size))
+                        for k, v in storage.histories().items()
+                        if "loss" in k
+                    ]
+                ),
+                time="time: {:.4f}  ".format(iter_time) if iter_time is not None else "",
+                data_time="data_time: {:.4f}  ".format(data_time) if data_time is not None else "",
+                lr=lr,
+                memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "",
+            )
+        )
+
+
+class EventStorage:
+    """
+    The user-facing class that provides metric storage functionalities.
+
+    In the future we may add support for storing / logging other types of data if needed.
+    """
+
+    def __init__(self, start_iter=0):
+        """
+        Args:
+            start_iter (int): the iteration number to start with
+        """
+        self._history = defaultdict(HistoryBuffer)
+        self._smoothing_hints = {}
+        self._latest_scalars = {}
+        self._iter = start_iter
+        self._current_prefix = ""
+        self._vis_data = []
+        self._histograms = []
+
+    def put_image(self, img_name, img_tensor):
+        """
+        Add an `img_tensor` associated with `img_name`, to be shown on
+        tensorboard.
+
+        Args:
+            img_name (str): The name of the image to put into tensorboard.
+            img_tensor (torch.Tensor or numpy.array): An `uint8` or `float`
+                Tensor of shape `[channel, height, width]` where `channel` is
+                3. The image format should be RGB. The elements in img_tensor
+                can either have values in [0, 1] (float32) or [0, 255] (uint8).
+                The `img_tensor` will be visualized in tensorboard.
+        """
+        self._vis_data.append((img_name, img_tensor, self._iter))
+
+    def put_scalar(self, name, value, smoothing_hint=True):
+        """
+        Add a scalar `value` to the `HistoryBuffer` associated with `name`.
+
+        Args:
+            smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be
+                smoothed when logged. The hint will be accessible through
+                :meth:`EventStorage.smoothing_hints`.  A writer may ignore the hint
+                and apply custom smoothing rule.
+
+                It defaults to True because most scalars we save need to be smoothed to
+                provide any useful signal.
+        """
+        name = self._current_prefix + name
+        history = self._history[name]
+        value = float(value)
+        history.update(value, self._iter)
+        self._latest_scalars[name] = (value, self._iter)
+
+        existing_hint = self._smoothing_hints.get(name)
+        if existing_hint is not None:
+            assert (
+                existing_hint == smoothing_hint
+            ), "Scalar {} was put with a different smoothing_hint!".format(name)
+        else:
+            self._smoothing_hints[name] = smoothing_hint
+
+    def put_scalars(self, *, smoothing_hint=True, **kwargs):
+        """
+        Put multiple scalars from keyword arguments.
+
+        Examples:
+
+            storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True)
+        """
+        for k, v in kwargs.items():
+            self.put_scalar(k, v, smoothing_hint=smoothing_hint)
+
+    def put_histogram(self, hist_name, hist_tensor, bins=1000):
+        """
+        Create a histogram from a tensor.
+
+        Args:
+            hist_name (str): The name of the histogram to put into tensorboard.
+            hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted
+                into a histogram.
+            bins (int): Number of histogram bins.
+        """
+        ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item()
+
+        # Create a histogram with PyTorch
+        hist_counts = torch.histc(hist_tensor, bins=bins)
+        hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32)
+
+        # Parameter for the add_histogram_raw function of SummaryWriter
+        hist_params = dict(
+            tag=hist_name,
+            min=ht_min,
+            max=ht_max,
+            num=len(hist_tensor),
+            sum=float(hist_tensor.sum()),
+            sum_squares=float(torch.sum(hist_tensor ** 2)),
+            bucket_limits=hist_edges[1:].tolist(),
+            bucket_counts=hist_counts.tolist(),
+            global_step=self._iter,
+        )
+        self._histograms.append(hist_params)
+
+    def history(self, name):
+        """
+        Returns:
+            HistoryBuffer: the scalar history for name
+        """
+        ret = self._history.get(name, None)
+        if ret is None:
+            raise KeyError("No history metric available for {}!".format(name))
+        return ret
+
+    def histories(self):
+        """
+        Returns:
+            dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars
+        """
+        return self._history
+
+    def latest(self):
+        """
+        Returns:
+            dict[str -> (float, int)]: mapping from the name of each scalar to the most
+                recent value and the iteration number its added.
+        """
+        return self._latest_scalars
+
+    def latest_with_smoothing_hint(self, window_size=20):
+        """
+        Similar to :meth:`latest`, but the returned values
+        are either the un-smoothed original latest value,
+        or a median of the given window_size,
+        depend on whether the smoothing_hint is True.
+
+        This provides a default behavior that other writers can use.
+        """
+        result = {}
+        for k, (v, itr) in self._latest_scalars.items():
+            result[k] = (
+                self._history[k].median(window_size) if self._smoothing_hints[k] else v,
+                itr,
+            )
+        return result
+
+    def smoothing_hints(self):
+        """
+        Returns:
+            dict[name -> bool]: the user-provided hint on whether the scalar
+                is noisy and needs smoothing.
+        """
+        return self._smoothing_hints
+
+    def step(self):
+        """
+        User should either: (1) Call this function to increment storage.iter when needed. Or
+        (2) Set `storage.iter` to the correct iteration number before each iteration.
+
+        The storage will then be able to associate the new data with an iteration number.
+        """
+        self._iter += 1
+
+    @property
+    def iter(self):
+        """
+        Returns:
+            int: The current iteration number. When used together with a trainer,
+                this is ensured to be the same as trainer.iter.
+        """
+        return self._iter
+
+    @iter.setter
+    def iter(self, val):
+        self._iter = int(val)
+
+    @property
+    def iteration(self):
+        # for backward compatibility
+        return self._iter
+
+    def __enter__(self):
+        _CURRENT_STORAGE_STACK.append(self)
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        assert _CURRENT_STORAGE_STACK[-1] == self
+        _CURRENT_STORAGE_STACK.pop()
+
+    @contextmanager
+    def name_scope(self, name):
+        """
+        Yields:
+            A context within which all the events added to this storage
+            will be prefixed by the name scope.
+        """
+        old_prefix = self._current_prefix
+        self._current_prefix = name.rstrip("/") + "/"
+        yield
+        self._current_prefix = old_prefix
+
+    def clear_images(self):
+        """
+        Delete all the stored images for visualization. This should be called
+        after images are written to tensorboard.
+        """
+        self._vis_data = []
+
+    def clear_histograms(self):
+        """
+        Delete all the stored histograms for visualization.
+        This should be called after histograms are written to tensorboard.
+        """
+        self._histograms = []
diff --git a/src/sts/detectron2/utils/file_io.py b/src/sts/detectron2/utils/file_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..46ee4ec31d04eee77976ff3edbbf84762a3409ed
--- /dev/null
+++ b/src/sts/detectron2/utils/file_io.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from iopath.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler
+from iopath.common.file_io import PathManager as PathManagerBase
+
+__all__ = ["PathManager", "PathHandler"]
+
+
+PathManager = PathManagerBase()
+"""
+This is a detectron2 project-specific PathManager.
+We try to stay away from global PathManager in fvcore as it
+introduces potential conflicts among other libraries.
+"""
+
+
+class Detectron2Handler(PathHandler):
+    """
+    Resolve anything that's hosted under detectron2's namespace.
+    """
+
+    PREFIX = "detectron2://"
+    S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
+
+    def _get_supported_prefixes(self):
+        return [self.PREFIX]
+
+    def _get_local_path(self, path, **kwargs):
+        name = path[len(self.PREFIX) :]
+        return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name, **kwargs)
+
+    def _open(self, path, mode="r", **kwargs):
+        return PathManager.open(self._get_local_path(path), mode, **kwargs)
+
+
+PathManager.register_handler(HTTPURLHandler())
+PathManager.register_handler(OneDrivePathHandler())
+PathManager.register_handler(Detectron2Handler())
diff --git a/src/sts/detectron2/utils/logger.py b/src/sts/detectron2/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c7890f8bec5db44098fe1a38d26eb13231f7063
--- /dev/null
+++ b/src/sts/detectron2/utils/logger.py
@@ -0,0 +1,237 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import atexit
+import functools
+import logging
+import os
+import sys
+import time
+from collections import Counter
+import torch
+from tabulate import tabulate
+from termcolor import colored
+
+from detectron2.utils.file_io import PathManager
+
+__all__ = ["setup_logger", "log_first_n", "log_every_n", "log_every_n_seconds"]
+
+
+class _ColorfulFormatter(logging.Formatter):
+    def __init__(self, *args, **kwargs):
+        self._root_name = kwargs.pop("root_name") + "."
+        self._abbrev_name = kwargs.pop("abbrev_name", "")
+        if len(self._abbrev_name):
+            self._abbrev_name = self._abbrev_name + "."
+        super(_ColorfulFormatter, self).__init__(*args, **kwargs)
+
+    def formatMessage(self, record):
+        record.name = record.name.replace(self._root_name, self._abbrev_name)
+        log = super(_ColorfulFormatter, self).formatMessage(record)
+        if record.levelno == logging.WARNING:
+            prefix = colored("WARNING", "red", attrs=["blink"])
+        elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL:
+            prefix = colored("ERROR", "red", attrs=["blink", "underline"])
+        else:
+            return log
+        return prefix + " " + log
+
+
+@functools.lru_cache()  # so that calling setup_logger multiple times won't add many handlers
+def setup_logger(
+    output=None, distributed_rank=0, *, color=True, name="detectron2", abbrev_name=None
+):
+    """
+    Initialize the detectron2 logger and set its verbosity level to "DEBUG".
+
+    Args:
+        output (str): a file name or a directory to save log. If None, will not save log file.
+            If ends with ".txt" or ".log", assumed to be a file name.
+            Otherwise, logs will be saved to `output/log.txt`.
+        name (str): the root module name of this logger
+        abbrev_name (str): an abbreviation of the module, to avoid long names in logs.
+            Set to "" to not log the root module in logs.
+            By default, will abbreviate "detectron2" to "d2" and leave other
+            modules unchanged.
+
+    Returns:
+        logging.Logger: a logger
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.DEBUG)
+    logger.propagate = False
+
+    if abbrev_name is None:
+        abbrev_name = "d2" if name == "detectron2" else name
+
+    plain_formatter = logging.Formatter(
+        "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S"
+    )
+    # stdout logging: master only
+    if distributed_rank == 0:
+        ch = logging.StreamHandler(stream=sys.stdout)
+        ch.setLevel(logging.DEBUG)
+        if color:
+            formatter = _ColorfulFormatter(
+                colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s",
+                datefmt="%m/%d %H:%M:%S",
+                root_name=name,
+                abbrev_name=str(abbrev_name),
+            )
+        else:
+            formatter = plain_formatter
+        ch.setFormatter(formatter)
+        logger.addHandler(ch)
+
+    # file logging: all workers
+    if output is not None:
+        if output.endswith(".txt") or output.endswith(".log"):
+            filename = output
+        else:
+            filename = os.path.join(output, "log.txt")
+        if distributed_rank > 0:
+            filename = filename + ".rank{}".format(distributed_rank)
+        PathManager.mkdirs(os.path.dirname(filename))
+
+        fh = logging.StreamHandler(_cached_log_stream(filename))
+        fh.setLevel(logging.DEBUG)
+        fh.setFormatter(plain_formatter)
+        logger.addHandler(fh)
+
+    return logger
+
+
+# cache the opened file object, so that different calls to `setup_logger`
+# with the same file name can safely write to the same file.
+@functools.lru_cache(maxsize=None)
+def _cached_log_stream(filename):
+    # use 1K buffer if writing to cloud storage
+    io = PathManager.open(filename, "a", buffering=1024 if "://" in filename else -1)
+    atexit.register(io.close)
+    return io
+
+
+"""
+Below are some other convenient logging methods.
+They are mainly adopted from
+https://github.com/abseil/abseil-py/blob/master/absl/logging/__init__.py
+"""
+
+
+def _find_caller():
+    """
+    Returns:
+        str: module name of the caller
+        tuple: a hashable key to be used to identify different callers
+    """
+    frame = sys._getframe(2)
+    while frame:
+        code = frame.f_code
+        if os.path.join("utils", "logger.") not in code.co_filename:
+            mod_name = frame.f_globals["__name__"]
+            if mod_name == "__main__":
+                mod_name = "detectron2"
+            return mod_name, (code.co_filename, frame.f_lineno, code.co_name)
+        frame = frame.f_back
+
+
+_LOG_COUNTER = Counter()
+_LOG_TIMER = {}
+
+
+def log_first_n(lvl, msg, n=1, *, name=None, key="caller"):
+    """
+    Log only for the first n times.
+
+    Args:
+        lvl (int): the logging level
+        msg (str):
+        n (int):
+        name (str): name of the logger to use. Will use the caller's module by default.
+        key (str or tuple[str]): the string(s) can be one of "caller" or
+            "message", which defines how to identify duplicated logs.
+            For example, if called with `n=1, key="caller"`, this function
+            will only log the first call from the same caller, regardless of
+            the message content.
+            If called with `n=1, key="message"`, this function will log the
+            same content only once, even if they are called from different places.
+            If called with `n=1, key=("caller", "message")`, this function
+            will not log only if the same caller has logged the same message before.
+    """
+    if isinstance(key, str):
+        key = (key,)
+    assert len(key) > 0
+
+    caller_module, caller_key = _find_caller()
+    hash_key = ()
+    if "caller" in key:
+        hash_key = hash_key + caller_key
+    if "message" in key:
+        hash_key = hash_key + (msg,)
+
+    _LOG_COUNTER[hash_key] += 1
+    if _LOG_COUNTER[hash_key] <= n:
+        logging.getLogger(name or caller_module).log(lvl, msg)
+
+
+def log_every_n(lvl, msg, n=1, *, name=None):
+    """
+    Log once per n times.
+
+    Args:
+        lvl (int): the logging level
+        msg (str):
+        n (int):
+        name (str): name of the logger to use. Will use the caller's module by default.
+    """
+    caller_module, key = _find_caller()
+    _LOG_COUNTER[key] += 1
+    if n == 1 or _LOG_COUNTER[key] % n == 1:
+        logging.getLogger(name or caller_module).log(lvl, msg)
+
+
+def log_every_n_seconds(lvl, msg, n=1, *, name=None):
+    """
+    Log no more than once per n seconds.
+
+    Args:
+        lvl (int): the logging level
+        msg (str):
+        n (int):
+        name (str): name of the logger to use. Will use the caller's module by default.
+    """
+    caller_module, key = _find_caller()
+    last_logged = _LOG_TIMER.get(key, None)
+    current_time = time.time()
+    if last_logged is None or current_time - last_logged >= n:
+        logging.getLogger(name or caller_module).log(lvl, msg)
+        _LOG_TIMER[key] = current_time
+
+
+def create_small_table(small_dict):
+    """
+    Create a small table using the keys of small_dict as headers. This is only
+    suitable for small dictionaries.
+
+    Args:
+        small_dict (dict): a result dictionary of only a few items.
+
+    Returns:
+        str: the table as a string.
+    """
+    keys, values = tuple(zip(*small_dict.items()))
+    table = tabulate(
+        [values],
+        headers=keys,
+        tablefmt="pipe",
+        floatfmt=".3f",
+        stralign="center",
+        numalign="center",
+    )
+    return table
+
+
+def _log_api_usage(identifier: str):
+    """
+    Internal function used to log the usage of different detectron2 components
+    inside facebook's infra.
+    """
+    torch._C._log_api_usage_once("detectron2." + identifier)
diff --git a/src/sts/detectron2/utils/memory.py b/src/sts/detectron2/utils/memory.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd494780b9dbbd1571688cd270bb9b53d113c13e
--- /dev/null
+++ b/src/sts/detectron2/utils/memory.py
@@ -0,0 +1,84 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+from contextlib import contextmanager
+from functools import wraps
+import torch
+
+__all__ = ["retry_if_cuda_oom"]
+
+
+@contextmanager
+def _ignore_torch_cuda_oom():
+    """
+    A context which ignores CUDA OOM exception from pytorch.
+    """
+    try:
+        yield
+    except RuntimeError as e:
+        # NOTE: the string may change?
+        if "CUDA out of memory. " in str(e):
+            pass
+        else:
+            raise
+
+
+def retry_if_cuda_oom(func):
+    """
+    Makes a function retry itself after encountering
+    pytorch's CUDA OOM error.
+    It will first retry after calling `torch.cuda.empty_cache()`.
+
+    If that still fails, it will then retry by trying to convert inputs to CPUs.
+    In this case, it expects the function to dispatch to CPU implementation.
+    The return values may become CPU tensors as well and it's user's
+    responsibility to convert it back to CUDA tensor if needed.
+
+    Args:
+        func: a stateless callable that takes tensor-like objects as arguments
+
+    Returns:
+        a callable which retries `func` if OOM is encountered.
+
+    Examples:
+    ::
+        output = retry_if_cuda_oom(some_torch_function)(input1, input2)
+        # output may be on CPU even if inputs are on GPU
+
+    Note:
+        1. When converting inputs to CPU, it will only look at each argument and check
+           if it has `.device` and `.to` for conversion. Nested structures of tensors
+           are not supported.
+
+        2. Since the function might be called more than once, it has to be
+           stateless.
+    """
+
+    def maybe_to_cpu(x):
+        try:
+            like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
+        except AttributeError:
+            like_gpu_tensor = False
+        if like_gpu_tensor:
+            return x.to(device="cpu")
+        else:
+            return x
+
+    @wraps(func)
+    def wrapped(*args, **kwargs):
+        with _ignore_torch_cuda_oom():
+            return func(*args, **kwargs)
+
+        # Clear cache and retry
+        torch.cuda.empty_cache()
+        with _ignore_torch_cuda_oom():
+            return func(*args, **kwargs)
+
+        # Try on CPU. This slows down the code significantly, therefore print a notice.
+        logger = logging.getLogger(__name__)
+        logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
+        new_args = (maybe_to_cpu(x) for x in args)
+        new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
+        return func(*new_args, **new_kwargs)
+
+    return wrapped
diff --git a/src/sts/detectron2/utils/registry.py b/src/sts/detectron2/utils/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea0434d9740ba781072d53967d25d2c69b91b62e
--- /dev/null
+++ b/src/sts/detectron2/utils/registry.py
@@ -0,0 +1,42 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+from typing import Any
+import pydoc
+from fvcore.common.registry import Registry  # for backward compatibility.
+
+"""
+``Registry`` and `locate` provide ways to map a string (typically found
+in config files) to callable objects.
+"""
+
+__all__ = ["Registry", "locate"]
+
+
+def _convert_target_to_string(t: Any) -> Any:
+    """
+    Inverse of ``locate()``.
+    """
+    return f"{t.__module__}.{t.__qualname__}"
+
+
+def locate(name: str) -> Any:
+    """
+    Locate and return an object ``x`` using an input string ``{x.__module__}.{x.__qualname__}``,
+    such as "module.submodule.class_name".
+
+    Raise Exception if it cannot be found.
+    """
+    obj = pydoc.locate(name)
+
+    # Some cases (e.g. torch.optim.sgd.SGD) not handled correctly
+    # by pydoc.locate. Try a private function from hydra.
+    # Should use _locate directly if it's public.
+    if obj is None:
+        try:
+            from hydra.utils import get_method
+        except ImportError as e:
+            raise ImportError(f"Cannot dynamically locate object {name}!") from e
+        else:
+            obj = get_method(name)  # it raises if fails
+
+    return obj
diff --git a/src/sts/detectron2/utils/serialize.py b/src/sts/detectron2/utils/serialize.py
new file mode 100644
index 0000000000000000000000000000000000000000..96bb153ec82117d062ad4849237d41d9877e7f9c
--- /dev/null
+++ b/src/sts/detectron2/utils/serialize.py
@@ -0,0 +1,29 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import cloudpickle
+
+
+class PicklableWrapper(object):
+    """
+    Wrap an object to make it more picklable, note that it uses
+    heavy weight serialization libraries that are slower than pickle.
+    It's best to use it only on closures (which are usually not picklable).
+
+    This is a simplified version of
+    https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
+    """
+
+    def __init__(self, obj):
+        self._obj = obj
+
+    def __reduce__(self):
+        s = cloudpickle.dumps(self._obj)
+        return cloudpickle.loads, (s,)
+
+    def __call__(self, *args, **kwargs):
+        return self._obj(*args, **kwargs)
+
+    def __getattr__(self, attr):
+        # Ensure that the wrapped object can be used seamlessly as the previous object.
+        if attr not in ["_obj"]:
+            return getattr(self._obj, attr)
+        return getattr(self, attr)
diff --git a/src/sts/detectron2/utils/testing.py b/src/sts/detectron2/utils/testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..23c70b98d9d0d6c18d93a2488661c49c71a0c5fd
--- /dev/null
+++ b/src/sts/detectron2/utils/testing.py
@@ -0,0 +1,132 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import io
+import numpy as np
+import torch
+
+from detectron2 import model_zoo
+from detectron2.data import DatasetCatalog
+from detectron2.data.detection_utils import read_image
+from detectron2.modeling import build_model
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.file_io import PathManager
+
+
+"""
+Internal utilities for tests. Don't use except for writing tests.
+"""
+
+
+def get_model_no_weights(config_path):
+    """
+    Like model_zoo.get, but do not load any weights (even pretrained)
+    """
+    cfg = model_zoo.get_config(config_path)
+    if not torch.cuda.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+    return build_model(cfg)
+
+
+def random_boxes(num_boxes, max_coord=100, device="cpu"):
+    """
+    Create a random Nx4 boxes tensor, with coordinates < max_coord.
+    """
+    boxes = torch.rand(num_boxes, 4, device=device) * (max_coord * 0.5)
+    boxes.clamp_(min=1.0)  # tiny boxes cause numerical instability in box regression
+    # Note: the implementation of this function in torchvision is:
+    # boxes[:, 2:] += torch.rand(N, 2) * 100
+    # but it does not guarantee non-negative widths/heights constraints:
+    # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]:
+    boxes[:, 2:] += boxes[:, :2]
+    return boxes
+
+
+def get_sample_coco_image(tensor=True):
+    """
+    Args:
+        tensor (bool): if True, returns 3xHxW tensor.
+            else, returns a HxWx3 numpy array.
+
+    Returns:
+        an image, in BGR color.
+    """
+    try:
+        file_name = DatasetCatalog.get("coco_2017_val_100")[0]["file_name"]
+        if not PathManager.exists(file_name):
+            raise FileNotFoundError()
+    except IOError:
+        # for public CI to run
+        file_name = "http://images.cocodataset.org/train2017/000000000009.jpg"
+    ret = read_image(file_name, format="BGR")
+    if tensor:
+        ret = torch.from_numpy(np.ascontiguousarray(ret.transpose(2, 0, 1)))
+    return ret
+
+
+def convert_scripted_instances(instances):
+    """
+    Convert a scripted Instances object to a regular :class:`Instances` object
+    """
+    ret = Instances(instances.image_size)
+    for name in instances._field_names:
+        val = getattr(instances, "_" + name, None)
+        if val is not None:
+            ret.set(name, val)
+    return ret
+
+
+def assert_instances_allclose(input, other, *, rtol=1e-5, msg="", size_as_tensor=False):
+    """
+    Args:
+        input, other (Instances):
+        size_as_tensor: compare image_size of the Instances as tensors (instead of tuples).
+             Useful for comparing outputs of tracing.
+    """
+    if not isinstance(input, Instances):
+        input = convert_scripted_instances(input)
+    if not isinstance(other, Instances):
+        other = convert_scripted_instances(other)
+
+    if not msg:
+        msg = "Two Instances are different! "
+    else:
+        msg = msg.rstrip() + " "
+
+    size_error_msg = msg + f"image_size is {input.image_size} vs. {other.image_size}!"
+    if size_as_tensor:
+        assert torch.equal(
+            torch.tensor(input.image_size), torch.tensor(other.image_size)
+        ), size_error_msg
+    else:
+        assert input.image_size == other.image_size, size_error_msg
+    fields = sorted(input.get_fields().keys())
+    fields_other = sorted(other.get_fields().keys())
+    assert fields == fields_other, msg + f"Fields are {fields} vs {fields_other}!"
+
+    for f in fields:
+        val1, val2 = input.get(f), other.get(f)
+        if isinstance(val1, Boxes):
+            # boxes in the range of O(100) and can have a larger tolerance
+            assert torch.allclose(val1.tensor, val2.tensor, atol=100 * rtol), (
+                msg + f"Field {f} differs too much!"
+            )
+        elif isinstance(val1, torch.Tensor):
+            if val1.dtype.is_floating_point:
+                mag = torch.abs(val1).max().cpu().item()
+                assert torch.allclose(val1, val2, atol=mag * rtol), (
+                    msg + f"Field {f} differs too much!"
+                )
+            else:
+                assert torch.equal(val1, val2), msg + f"Field {f} is different!"
+        else:
+            raise ValueError(f"Don't know how to compare type {type(val1)}")
+
+
+def reload_script_model(module):
+    """
+    Save a jit module and load it back.
+    Similar to the `getExportImportCopy` function in torch/testing/
+    """
+    buffer = io.BytesIO()
+    torch.jit.save(module, buffer)
+    buffer.seek(0)
+    return torch.jit.load(buffer)
diff --git a/src/sts/detectron2/utils/video_visualizer.py b/src/sts/detectron2/utils/video_visualizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..904ab1f6b7d8abb243ba05b300dd6d9c5e23ab14
--- /dev/null
+++ b/src/sts/detectron2/utils/video_visualizer.py
@@ -0,0 +1,236 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+import pycocotools.mask as mask_util
+
+from detectron2.utils.visualizer import (
+    ColorMode,
+    Visualizer,
+    _create_text_labels,
+    _PanopticPrediction,
+)
+
+from .colormap import random_color
+
+
+class _DetectedInstance:
+    """
+    Used to store data about detected objects in video frame,
+    in order to transfer color to objects in the future frames.
+
+    Attributes:
+        label (int):
+        bbox (tuple[float]):
+        mask_rle (dict):
+        color (tuple[float]): RGB colors in range (0, 1)
+        ttl (int): time-to-live for the instance. For example, if ttl=2,
+            the instance color can be transferred to objects in the next two frames.
+    """
+
+    __slots__ = ["label", "bbox", "mask_rle", "color", "ttl"]
+
+    def __init__(self, label, bbox, mask_rle, color, ttl):
+        self.label = label
+        self.bbox = bbox
+        self.mask_rle = mask_rle
+        self.color = color
+        self.ttl = ttl
+
+
+class VideoVisualizer:
+    def __init__(self, metadata, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            metadata (MetadataCatalog): image metadata.
+        """
+        self.metadata = metadata
+        self._old_instances = []
+        assert instance_mode in [
+            ColorMode.IMAGE,
+            ColorMode.IMAGE_BW,
+        ], "Other mode not supported yet."
+        self._instance_mode = instance_mode
+
+    def draw_instance_predictions(self, frame, predictions):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        frame_visualizer = Visualizer(frame, self.metadata)
+        num_instances = len(predictions)
+        if num_instances == 0:
+            return frame_visualizer.output
+
+        boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
+        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+
+        if predictions.has("pred_masks"):
+            masks = predictions.pred_masks
+            # mask IOU is not yet enabled
+            # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
+            # assert len(masks_rles) == num_instances
+        else:
+            masks = None
+
+        detected = [
+            _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8)
+            for i in range(num_instances)
+        ]
+        colors = self._assign_colors(detected)
+
+        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            # any() returns uint8 tensor
+            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
+                (masks.any(dim=0) > 0).numpy() if masks is not None else None
+            )
+            alpha = 0.3
+        else:
+            alpha = 0.5
+
+        frame_visualizer.overlay_instances(
+            # boxes=None if masks is not None else boxes,  # boxes are a bit distracting
+            boxes=boxes,
+            masks=masks,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+        )
+
+        return frame_visualizer.output
+
+    def draw_sem_seg(self, frame, sem_seg, area_threshold=None):
+        """
+        Args:
+            sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W),
+                each value is the integer label.
+            area_threshold (Optional[int]): only draw segmentations larger than the threshold
+        """
+        # don't need to do anything special
+        frame_visualizer = Visualizer(frame, self.metadata)
+        frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None)
+        return frame_visualizer.output
+
+    def draw_panoptic_seg_predictions(
+        self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5
+    ):
+        frame_visualizer = Visualizer(frame, self.metadata)
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
+                pred.non_empty_mask()
+            )
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            frame_visualizer.draw_binary_mask(
+                mask,
+                color=mask_color,
+                text=self.metadata.stuff_classes[category_idx],
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return frame_visualizer.output
+        # draw mask for all instances second
+        masks, sinfo = list(zip(*all_instances))
+        num_instances = len(masks)
+        masks_rles = mask_util.encode(
+            np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F")
+        )
+        assert len(masks_rles) == num_instances
+
+        category_ids = [x["category_id"] for x in sinfo]
+        detected = [
+            _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8)
+            for i in range(num_instances)
+        ]
+        colors = self._assign_colors(detected)
+        labels = [self.metadata.thing_classes[k] for k in category_ids]
+
+        frame_visualizer.overlay_instances(
+            boxes=None,
+            masks=masks,
+            labels=labels,
+            keypoints=None,
+            assigned_colors=colors,
+            alpha=alpha,
+        )
+        return frame_visualizer.output
+
+    def _assign_colors(self, instances):
+        """
+        Naive tracking heuristics to assign same color to the same instance,
+        will update the internal state of tracked instances.
+
+        Returns:
+            list[tuple[float]]: list of colors.
+        """
+
+        # Compute iou with either boxes or masks:
+        is_crowd = np.zeros((len(instances),), dtype=np.bool)
+        if instances[0].bbox is None:
+            assert instances[0].mask_rle is not None
+            # use mask iou only when box iou is None
+            # because box seems good enough
+            rles_old = [x.mask_rle for x in self._old_instances]
+            rles_new = [x.mask_rle for x in instances]
+            ious = mask_util.iou(rles_old, rles_new, is_crowd)
+            threshold = 0.5
+        else:
+            boxes_old = [x.bbox for x in self._old_instances]
+            boxes_new = [x.bbox for x in instances]
+            ious = mask_util.iou(boxes_old, boxes_new, is_crowd)
+            threshold = 0.6
+        if len(ious) == 0:
+            ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32")
+
+        # Only allow matching instances of the same label:
+        for old_idx, old in enumerate(self._old_instances):
+            for new_idx, new in enumerate(instances):
+                if old.label != new.label:
+                    ious[old_idx, new_idx] = 0
+
+        matched_new_per_old = np.asarray(ious).argmax(axis=1)
+        max_iou_per_old = np.asarray(ious).max(axis=1)
+
+        # Try to find match for each old instance:
+        extra_instances = []
+        for idx, inst in enumerate(self._old_instances):
+            if max_iou_per_old[idx] > threshold:
+                newidx = matched_new_per_old[idx]
+                if instances[newidx].color is None:
+                    instances[newidx].color = inst.color
+                    continue
+            # If an old instance does not match any new instances,
+            # keep it for the next frame in case it is just missed by the detector
+            inst.ttl -= 1
+            if inst.ttl > 0:
+                extra_instances.append(inst)
+
+        # Assign random color to newly-detected instances:
+        for inst in instances:
+            if inst.color is None:
+                inst.color = random_color(rgb=True, maximum=1)
+        self._old_instances = instances[:] + extra_instances
+        return [d.color for d in instances]
diff --git a/src/sts/detectron2/utils/visualizer.py b/src/sts/detectron2/utils/visualizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..add1fec6ab7889bbdd1f8b9056df37efc0d8a5aa
--- /dev/null
+++ b/src/sts/detectron2/utils/visualizer.py
@@ -0,0 +1,1398 @@
+# Edit by Yao Lu
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+import colorsys
+import logging
+import math
+import numpy as np
+from enum import Enum, unique
+import cv2
+import matplotlib as mpl
+import matplotlib.colors as mplc
+import matplotlib.figure as mplfigure
+import pycocotools.mask as mask_util
+import torch
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from .colormap import random_color
+from shapely.geometry import *
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["ColorMode", "VisImage", "Visualizer"]
+
+
+_SMALL_OBJECT_AREA_THRESH = 1000
+_LARGE_MASK_AREA_THRESH = 120000
+_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
+_BLACK = (0, 0, 0)
+_RED = (1.0, 0, 0)
+
+_KEYPOINT_THRESHOLD = 0.05
+
+
+def py_cpu_pnms(dets, scores, thresh):
+    pts = dets
+    # for i in xrange(dets.shape[0]):
+    #     pts.append([[int(bbox[i, 0]) + info_bbox[i, j], int(bbox[i, 1]) + info_bbox[i, j+1]] for j in xrange(0,28,2)])
+    scores = np.array(scores)
+    order = scores.argsort()[::-1]
+    areas = np.zeros(scores.shape)
+    order = scores.argsort()[::-1]
+    inter_areas = np.zeros((scores.shape[0], scores.shape[0]))
+    for il in range(len(pts)):
+        poly = Polygon(pts[il]).buffer(0.001)
+        areas[il] = poly.area
+        for jl in range(il, len(pts)):
+            polyj = Polygon(pts[jl].tolist()).buffer(0.001)
+            inS = poly.intersection(polyj)
+            try:
+                inter_areas[il][jl] = inS.area
+            except:
+                import pdb;pdb.set_trace()
+            inter_areas[jl][il] = inS.area
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        ovr = inter_areas[i][order[1:]] / ((areas[i]) + areas[order[1:]] - inter_areas[i][order[1:]])
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+@unique
+class ColorMode(Enum):
+    """
+    Enum of different color modes to use for instance visualizations.
+    """
+
+    IMAGE = 0
+    """
+    Picks a random color for every instance and overlay segmentations with low opacity.
+    """
+    SEGMENTATION = 1
+    """
+    Let instances of the same category have similar colors
+    (from metadata.thing_colors), and overlay them with
+    high opacity. This provides more attention on the quality of segmentation.
+    """
+    IMAGE_BW = 2
+    """
+    Same as IMAGE, but convert all areas without masks to gray-scale.
+    Only available for drawing per-instance mask predictions.
+    """
+
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (height, width), m.shape
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        #res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+
+class _PanopticPrediction:
+    def __init__(self, panoptic_seg, segments_info, metadata=None):
+        if segments_info is None:
+            assert metadata is not None
+            # If "segments_info" is None, we assume "panoptic_img" is a
+            # H*W int32 image storing the panoptic_id in the format of
+            # category_id * label_divisor + instance_id. We reserve -1 for
+            # VOID label.
+            label_divisor = metadata.label_divisor
+            segments_info = []
+            for panoptic_label in np.unique(panoptic_seg.numpy()):
+                if panoptic_label == -1:
+                    # VOID region.
+                    continue
+                pred_class = panoptic_label // label_divisor
+                isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
+                segments_info.append(
+                    {
+                        "id": int(panoptic_label),
+                        "category_id": int(pred_class),
+                        "isthing": bool(isthing),
+                    }
+                )
+        del metadata
+
+        self._seg = panoptic_seg
+
+        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
+        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
+        areas = areas.numpy()
+        sorted_idxs = np.argsort(-areas)
+        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
+        self._seg_ids = self._seg_ids.tolist()
+        for sid, area in zip(self._seg_ids, self._seg_areas):
+            if sid in self._sinfo:
+                self._sinfo[sid]["area"] = float(area)
+
+    def non_empty_mask(self):
+        """
+        Returns:
+            (H, W) array, a mask for all pixels that have a prediction
+        """
+        empty_ids = []
+        for id in self._seg_ids:
+            if id not in self._sinfo:
+                empty_ids.append(id)
+        if len(empty_ids) == 0:
+            return np.zeros(self._seg.shape, dtype=np.uint8)
+        assert (
+            len(empty_ids) == 1
+        ), ">1 ids corresponds to no labels. This is currently not supported"
+        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
+
+    def semantic_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or sinfo["isthing"]:
+                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
+                continue
+            yield (self._seg == sid).numpy().astype(np.bool), sinfo
+
+    def instance_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or not sinfo["isthing"]:
+                continue
+            mask = (self._seg == sid).numpy().astype(np.bool)
+            if mask.sum() > 0:
+                yield mask, sinfo
+
+
+def _create_text_labels(classes, scores, class_names):
+    """
+    Args:
+        classes (list[int] or None):
+        scores (list[float] or None):
+        class_names (list[str] or None):
+
+    Returns:
+        list[str] or None
+    """
+    labels = None
+    if classes is not None and class_names is not None and len(class_names) > 0:
+        labels = [class_names[i] for i in classes]
+    if scores is not None:
+        if labels is None:
+            labels = ["{:.0f}%".format(s * 100) for s in scores]
+        else:
+            # labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
+            #luyao
+            labels = ["{}.{:.0f}".format(l, s * 100) for l, s in zip(labels, scores)]
+    return labels
+
+
+class VisImage:
+    def __init__(self, img, scale=1.0):
+        """
+        Args:
+            img (ndarray): an RGB image of shape (H, W, 3).
+            scale (float): scale the input image
+        """
+        self.img = img
+        self.scale = scale
+        self.width, self.height = img.shape[1], img.shape[0]
+        self._setup_figure(img)
+
+    def _setup_figure(self, img):
+        """
+        Args:
+            Same as in :meth:`__init__()`.
+
+        Returns:
+            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
+            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
+        """
+        fig = mplfigure.Figure(frameon=False)
+        self.dpi = fig.get_dpi()
+        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
+        # (https://github.com/matplotlib/matplotlib/issues/15363)
+        fig.set_size_inches(
+            (self.width * self.scale + 1e-2) / self.dpi,
+            (self.height * self.scale + 1e-2) / self.dpi,
+        )
+        self.canvas = FigureCanvasAgg(fig)
+        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
+        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
+        ax.axis("off")
+        # Need to imshow this first so that other patches can be drawn on top
+        ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
+
+        self.fig = fig
+        self.ax = ax
+
+    def save(self, filepath):
+        """
+        Args:
+            filepath (str): a string that contains the absolute path, including the file name, where
+                the visualized image will be saved.
+        """
+        self.fig.savefig(filepath)
+       # self.fig.savefig(filepath[:-4]+'.svg', format='svg')
+
+    def get_image(self):
+        """
+        Returns:
+            ndarray:
+                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
+                The shape is scaled w.r.t the input image using the given `scale` argument.
+        """
+        canvas = self.canvas
+        s, (width, height) = canvas.print_to_buffer()
+        # buf = io.BytesIO()  # works for cairo backend
+        # canvas.print_rgba(buf)
+        # width, height = self.width, self.height
+        # s = buf.getvalue()
+
+        buffer = np.frombuffer(s, dtype="uint8")
+
+        img_rgba = buffer.reshape(height, width, 4)
+        rgb, alpha = np.split(img_rgba, [3], axis=2)
+        return rgb.astype("uint8")
+
+
+class Visualizer:
+    """
+    Visualizer that draws data about detection/segmentation on images.
+
+    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
+    that draw primitive objects to images, as well as high-level wrappers like
+    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
+    that draw composite data in some pre-defined style.
+
+    Note that the exact visualization style for the high-level wrappers are subject to change.
+    Style such as color, opacity, label contents, visibility of labels, or even the visibility
+    of objects themselves (e.g. when the object is too small) may change according
+    to different heuristics, as long as the results still look visually reasonable.
+    To obtain a consistent style, implement custom drawing functions with the primitive
+    methods instead.
+
+    This visualizer focuses on high rendering quality rather than performance. It is not
+    designed to be used for real-time applications.
+    """
+
+    # TODO implement a fast, rasterized version using OpenCV
+
+    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
+                the height and width of the image respectively. C is the number of
+                color channels. The image is required to be in RGB format since that
+                is a requirement of the Matplotlib library. The image is also expected
+                to be in the range [0, 255].
+            metadata (Metadata): image metadata.
+            instance_mode (ColorMode): defines one of the pre-defined style for drawing
+                instances on an image.
+        """
+        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
+        if metadata is None:
+            metadata = MetadataCatalog.get("__nonexist__")
+        self.metadata = metadata
+        self.output = VisImage(self.img, scale=scale)
+        self.cpu_device = torch.device("cpu")
+
+        # too small texts are useless, therefore clamp to 9
+        self._default_font_size = max(
+            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
+        )
+        self._instance_mode = instance_mode
+
+    def draw_instance_predictions(self, predictions, path):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = predictions.pred_classes if predictions.has("pred_classes") else None
+        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        #luyao#
+        # labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+        rec = predictions.pred_rec if predictions.has("pred_rec") else None
+        rec_score = predictions.pred_rec_score if predictions.has("pred_rec_score") else None
+        #luyao#
+        if predictions.has("pred_masks"):
+            masks = np.asarray(predictions.pred_masks)
+            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
+        else:
+            masks = None
+        # masks = None
+        
+        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
+            ]
+            #luyao#
+            alpha = 0.8
+        else:
+            colors = None
+            alpha = 0.77
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(
+                (predictions.pred_masks.any(dim=0) > 0).numpy()
+                if predictions.has("pred_masks")
+                else None
+            )
+            alpha = 0.3
+
+        self.overlay_instances(
+            rec=rec,
+            masks=masks,
+            boxes=boxes,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+            scores=scores,
+            path=path,
+            rec_score = rec_score
+        )
+        return self.output
+
+    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8):
+        """
+        Draw semantic segmentation predictions/labels.
+
+        Args:
+            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
+                Each value is the integer label of the pixel.
+            area_threshold (int): segments with less than `area_threshold` are not drawn.
+            alpha (float): the larger it is, the more opaque the segmentations are.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        if isinstance(sem_seg, torch.Tensor):
+            sem_seg = sem_seg.numpy()
+        labels, areas = np.unique(sem_seg, return_counts=True)
+        sorted_idxs = np.argsort(-areas).tolist()
+        labels = labels[sorted_idxs]
+        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
+            except (AttributeError, IndexError):
+                mask_color = None
+
+            binary_mask = (sem_seg == label).astype(np.uint8)
+            text = self.metadata.stuff_classes[label]
+            self.draw_binary_mask(
+                binary_mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        return self.output
+
+    def draw_panoptic_seg_predictions(
+        self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7
+    ):
+        """
+        Draw panoptic prediction results on an image.
+
+        Args:
+            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
+                segment.
+            segments_info (list[dict]): Describe each segment in `panoptic_seg`.
+                Each dict contains keys "id", "category_id", "isthing".
+            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(pred.non_empty_mask())
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            text = self.metadata.stuff_classes[category_idx]
+            self.draw_binary_mask(
+                mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        # draw mask for all instances second
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return self.output
+        masks, sinfo = list(zip(*all_instances))
+        category_ids = [x["category_id"] for x in sinfo]
+
+        try:
+            scores = [x["score"] for x in sinfo]
+        except KeyError:
+            scores = None
+        labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes)
+
+        try:
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids
+            ]
+        except AttributeError:
+            colors = None
+        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
+
+        return self.output
+
+    def draw_dataset_dict(self, dic):
+        """
+        Draw annotations/segmentaions in Detectron2 Dataset format.
+
+        Args:
+            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        annos = dic.get("annotations", None)
+        if annos:
+            if "segmentation" in annos[0]:
+                masks = [x["segmentation"] for x in annos]
+            else:
+                masks = None
+            if "keypoints" in annos[0]:
+                keypts = [x["keypoints"] for x in annos]
+                keypts = np.array(keypts).reshape(len(annos), -1, 3)
+            else:
+                keypts = None
+
+            boxes = [
+                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
+                if len(x["bbox"]) == 4
+                else x["bbox"]
+                for x in annos
+            ]
+
+            labels = [x["category_id"] for x in annos]
+            colors = None
+            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+                colors = [
+                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels
+                ]
+            names = self.metadata.get("thing_classes", None)
+            if names:
+                labels = [names[i] for i in labels]
+            labels = [
+                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
+                for i, a in zip(labels, annos)
+            ]
+            self.overlay_instances(
+                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
+            )
+
+        sem_seg = dic.get("sem_seg", None)
+        if sem_seg is None and "sem_seg_file_name" in dic:
+            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
+                sem_seg = Image.open(f)
+                sem_seg = np.asarray(sem_seg, dtype="uint8")
+        if sem_seg is not None:
+            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
+
+        pan_seg = dic.get("pan_seg", None)
+        if pan_seg is None and "pan_seg_file_name" in dic:
+            assert "segments_info" in dic
+            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
+                pan_seg = Image.open(f)
+                pan_seg = np.asarray(pan_seg)
+                from panopticapi.utils import rgb2id
+
+                pan_seg = rgb2id(pan_seg)
+            segments_info = dic["segments_info"]
+        if pan_seg is not None:
+            pan_seg = torch.Tensor(pan_seg)
+            self.draw_panoptic_seg_predictions(pan_seg, segments_info, area_threshold=0, alpha=0.5)
+        return self.output
+
+    def overlay_instances(
+        self,
+        *,
+        rec=None,
+        boxes=None,
+        labels=None,
+        masks=None,
+        keypoints=None,
+        assigned_colors=None,
+        alpha=0.5,
+        scores,
+        path,
+        rec_score,
+    ):
+        """
+        Args:
+            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
+                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
+                or a :class:`RotatedBoxes`,
+                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image,
+            labels (list[str]): the text to be displayed for each instance.
+            masks (masks-like object): Supported types are:
+
+                * :class:`detectron2.structures.PolygonMasks`,
+                  :class:`detectron2.structures.BitMasks`.
+                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
+                  The first level of the list corresponds to individual instances. The second
+                  level to all the polygon that compose the instance, and the third level
+                  to the polygon coordinates. The third level should have the format of
+                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
+                * list[dict]: each dict is a COCO-style RLE.
+            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
+                where the N is the number of instances and K is the number of keypoints.
+                The last dimension corresponds to (x, y, visibility or score).
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        rec = rec
+        def _decode_recognition(rec):
+            # CTLABELS = "_0123456789abcdefghijklmnopqrstuvwxyz"
+            CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
+            # ctc decoding
+            last_char = False
+            s = ''
+            for c in rec:
+                c = int(c)
+                if 0<c < 96:
+                   # if last_char != c:
+                    if CTLABELS[c-1] in "_0123456789abcdefghijklmnopqrstuvwxyz":
+                        s += CTLABELS[c-1]
+                        last_char = c
+                elif c == 96:
+                    s += u''
+                elif c == 97:
+                    if len(s) == 0:
+                        s = ' '
+                    return s
+            if len(s) == 0:
+                s = ' '
+            return s
+        def _ctc_decode_recognition(rec):
+            # ctc decoding
+            last_char = False
+            s = ''
+            CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
+            for c in rec:
+                c = int(c)
+                if c < 96:
+                    if last_char != c:
+                        if CTLABELS[c-1] in "_0123456789abcdefghijklmnopqrstuvwxyz":
+                            s += CTLABELS[c-1]
+                        last_char = c
+                else:
+                    last_char = False
+            return s
+
+        num_instances = None
+        if boxes is not None:
+            boxes = self._convert_boxes(boxes)
+            num_instances = len(boxes)
+        if masks is not None:
+            masks = self._convert_masks(masks)
+            if num_instances:
+                assert len(masks) == num_instances
+            else:
+                num_instances = len(masks)
+        if keypoints is not None:
+            if num_instances:
+                assert len(keypoints) == num_instances
+            else:
+                num_instances = len(keypoints)
+            keypoints = self._convert_keypoints(keypoints)
+        if labels is not None:
+            assert len(labels) == num_instances
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+        if num_instances == 0:
+            return self.output
+        if boxes is not None and boxes.shape[1] == 5:
+            return self.overlay_rotated_instances(
+                boxes=boxes, labels=labels, assigned_colors=assigned_colors
+            )
+
+        # Display in largest to smallest order to reduce occlusion.
+        areas = None
+        if boxes is not None:
+            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
+        elif masks is not None:
+            areas = np.asarray([x.area() for x in masks])
+
+        if areas is not None:
+            sorted_idxs = np.argsort(-areas).tolist()
+            # Re-order overlapped instances in descending order.
+            boxes = boxes[sorted_idxs] if boxes is not None else None
+            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
+            rec = [rec[idx] for idx in sorted_idxs] if rec is not None else None
+            # rec_score = [rec_score[idx] for idx in sorted_idxs] if rec is not None else None
+            scores = [scores[idx] for idx in sorted_idxs] if scores is not None else None
+            # assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
+            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
+        #luyao#
+        assigned_colors = [[0,113.985,118.955],[216.75,82.875,24.99],[236.895, 176.97, 31.875],[125.97, 46.92, 141.78],[118.83, 171.87, 47.94],[76.755, 189.975, 237.915],[161.925, 19.89, 46.92],\
+            [255,140,0 ],[70,130,180 ],[128,128,0 ],[205,92,92 ],[128,0,128 ],[255,182,193],[255,255,0],[105,105,105],[0,255,255],[0,255,0 ],\
+            [210,180,140],[255,0,0 ],[0,139,139],[255,0,255],[127,255,0],[75,0,130],[32,178,170],[255,215,0],[219,112,147],[148,0,211 ],\
+                [100,149,237],[175,238,238 ],[143,188,143],[255,255,224 ],[244,164,96],[188,143,143],[192,192,192 ],[220,20,60],[218,112,214],[147,112,219]]
+        rec = [_decode_recognition(rrec) for rrec in rec]
+        # assigned_colors = [[1,140/255,0],[30/255,144/255,1],[148/255,0,211/255],[0,1,1],[1,0,0],\
+        #     [30/255,143/255,1],[0.94,0.5,0.5],[1,1,0],[0.5,0.5,0],[0.823,0.412,0.117],[0.58,0,0.827],[0.5,0,0]\
+        #     ,[0.82,0.41,0.12],[0.41,0.41,0.41],[0,0.54,0.54],[0.75,0.25,0.65],[0.2,0.6,0.8],[0.74,0,0.3],[0,1.0,0.4],[1,0.5,0.5],[0.5,0.5,1]\
+        #         ,[0.6,0,1],[0.56,0.56,0.3],[0,1,0],[1.0,0.0,0.4],[0.0,1.0,0.4],[0.0,0.5,1.0],[1,215/255,0]]
+        poly = []
+        for i in range(num_instances):
+            bb = 1
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    if bb == 1:
+                        poly.append(masks[i].polygons[0].astype(int).reshape(-1,2))
+                        bb = 0
+        keep = py_cpu_pnms(poly,scores,0.5)
+        alpha = 0.7
+        for i in range(num_instances):
+            if rec[i] == ' ':
+                continue
+            if i not in keep:
+                continue
+            # color = assigned_colors[i]
+            # print(i)
+            color_ = assigned_colors[i%len(assigned_colors)]
+            color = [x/255 for x in color_]
+            # if boxes is not None:
+            #     self.draw_box(boxes[i], edge_color=color)
+            #luyao
+            # alpha = 0.6
+            H, W, _ = self.img.shape
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    # segment = polygon2rbox(segment, H, W)
+                    # segment = np.array(segment)
+                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
+            if labels is not None:
+                # first get a box
+                if boxes is not None:
+                    #luyao#
+                    x0, y0, x1, y1 = boxes[i]
+                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
+                    horiz_align = "left"
+                elif masks is not None:
+                    # skip small mask without polygon
+                    if len(masks[i].polygons) == 0:
+                        continue
+
+                    x0, y0, x1, y1 = masks[i].bbox()
+
+                    # draw text in the center (defined by median) when box is not drawn
+                    # median is less sensitive to outliers.
+                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
+                    horiz_align = "center"
+                else:
+                    continue  # drawing the box confidence for keypoints isn't very useful.
+                # for small objects, draw text at the side to avoid occlusion
+
+                instance_area = (y1 - y0) * (x1 - x0)
+                # print(x0,' ',x1,' ',y0,' ',y1,' ',self.output.height,' ', self.output.width)
+                #luyao#
+                if y0<5:
+                    text_pos = ((x0+x1)//2,(y0+y1)//2)
+                #luyao#
+                # if (
+                #     instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
+                #     or y1 - y0 < 40 * self.output.scale
+                # ):
+                #     if y1 >= self.output.height - 5:
+                #         text_pos = (x1, y0)
+                #     else:
+                #         text_pos = (x0, y1)
+
+                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
+                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+                font_size = (
+                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
+                    * 1.0
+                    * self._default_font_size
+                )
+                self.draw_text(
+                #    labels[i],
+                    # '',
+                    rec[i],
+                    text_pos,
+                    color=lighter_color,
+                    horizontal_alignment=horiz_align,
+                    font_size=font_size,
+                )
+        # draw keypoints
+        if keypoints is not None:
+            for keypoints_per_instance in keypoints:
+                self.draw_and_connect_keypoints(keypoints_per_instance)
+
+        return self.output
+
+    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
+        """
+        Args:
+            boxes (ndarray): an Nx5 numpy array of
+                (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image.
+            labels (list[str]): the text to be displayed for each instance.
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = len(boxes)
+
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+
+        if num_instances == 0:
+            return self.output
+
+        # Display in largest to smallest order to reduce occlusion.
+        if boxes is not None:
+            areas = boxes[:, 2] * boxes[:, 3]
+
+        sorted_idxs = np.argsort(-areas).tolist()
+        # Re-order overlapped instances in descending order.
+        boxes = boxes[sorted_idxs]
+        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+        colors = [assigned_colors[idx] for idx in sorted_idxs]
+
+        for i in range(num_instances):
+            self.draw_rotated_box_with_label(
+                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
+            )
+
+        return self.output
+
+    def draw_and_connect_keypoints(self, keypoints):
+        """
+        Draws keypoints of an instance and follows the rules for keypoint connections
+        to draw lines between appropriate keypoints. This follows color heuristics for
+        line color.
+
+        Args:
+            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
+                and the last dimension corresponds to (x, y, probability).
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        visible = {}
+        keypoint_names = self.metadata.get("keypoint_names")
+        for idx, keypoint in enumerate(keypoints):
+            # draw keypoint
+            x, y, prob = keypoint
+            if prob > _KEYPOINT_THRESHOLD:
+                self.draw_circle((x, y), color=_RED)
+                if keypoint_names:
+                    keypoint_name = keypoint_names[idx]
+                    visible[keypoint_name] = (x, y)
+
+        if self.metadata.get("keypoint_connection_rules"):
+            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
+                if kp0 in visible and kp1 in visible:
+                    x0, y0 = visible[kp0]
+                    x1, y1 = visible[kp1]
+                    color = tuple(x / 255.0 for x in color)
+                    self.draw_line([x0, x1], [y0, y1], color=color)
+
+        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
+        # Note that this strategy is specific to person keypoints.
+        # For other keypoints, it should just do nothing
+        try:
+            ls_x, ls_y = visible["left_shoulder"]
+            rs_x, rs_y = visible["right_shoulder"]
+            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
+        except KeyError:
+            pass
+        else:
+            # draw line from nose to mid-shoulder
+            nose_x, nose_y = visible.get("nose", (None, None))
+            if nose_x is not None:
+                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
+
+            try:
+                # draw line from mid-shoulder to mid-hip
+                lh_x, lh_y = visible["left_hip"]
+                rh_x, rh_y = visible["right_hip"]
+            except KeyError:
+                pass
+            else:
+                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
+                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
+        return self.output
+
+    """
+    Primitive drawing functions:
+    """
+
+    def draw_text(
+        self,
+        text,
+        position,
+        *,
+        font_size=None,
+        color="g",
+        horizontal_alignment="center",
+        rotation=0
+    ):
+        """
+        Args:
+            text (str): class label
+            position (tuple): a tuple of the x and y coordinates to place text on image.
+            font_size (int, optional): font of the text. If not provided, a font size
+                proportional to the image width is calculated and used.
+            color: color of the text. Refer to `matplotlib.colors` for full list
+                of formats that are accepted.
+            horizontal_alignment (str): see `matplotlib.text.Text`
+            rotation: rotation angle in degrees CCW
+
+        Returns:
+            output (VisImage): image object with text drawn.
+        """
+        if not font_size:
+            font_size = self._default_font_size
+
+        # print(font_size, self.output.scale)
+
+        # since the text background is dark, we don't want the text to be dark
+        # color = np.maximum(list(mplc.to_rgb(color)), 0.2)
+        # color[np.argmax(color)] = max(0.8, np.max(color))
+        #luyao#
+        color = 'w'
+        # font_size = 7.0
+        x, y = position
+        self.output.ax.text(
+            x,
+            y,
+            text,
+            size=font_size * self.output.scale,
+            # family="sans-serif",
+            family="monospace",
+            # family="serif",
+            #luyao#
+            bbox={"facecolor": "black", "alpha": 0.0, "pad": 0.0, "edgecolor": "none"},
+            # bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
+            # verticalalignment="top",
+            verticalalignment="bottom",
+            horizontalalignment=horizontal_alignment,
+            color=color,
+            zorder=10,
+            
+            rotation=rotation,
+            #luyao
+            # fontweight='light'
+        )
+        return self.output
+
+    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
+        """
+        Args:
+            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
+                are the coordinates of the image's top left corner. x1 and y1 are the
+                coordinates of the image's bottom right corner.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x0, y0, x1, y1 = box_coord
+        width = x1 - x0
+        height = y1 - y0
+
+        # linewidth = max(self._default_font_size / 16, 1)
+        # linewidth = max(self._default_font_size / 4, 1)
+        #luyao#
+        edge_color=[0.196,0.80,0.196]
+        alpha = 1.0
+        linewidth = 0.7
+
+        self.output.ax.add_patch(
+            mpl.patches.Rectangle(
+                (x0, y0),
+                width,
+                height,
+                fill=False,
+                edgecolor=edge_color,
+                linewidth=linewidth * self.output.scale,
+                alpha=alpha,
+                linestyle=line_style,
+            )
+        )
+        return self.output
+
+    def draw_rotated_box_with_label(
+        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
+    ):
+        """
+        Draw a rotated box with label on its top-left corner.
+
+        Args:
+            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
+                where cnt_x and cnt_y are the center coordinates of the box.
+                w and h are the width and height of the box. angle represents how
+                many degrees the box is rotated CCW with regard to the 0-degree box.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+            label (string): label for rotated box. It will not be rendered when set to None.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        cnt_x, cnt_y, w, h, angle = rotated_box
+        area = w * h
+        # use thinner lines when the box is small
+        linewidth = self._default_font_size / (
+            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
+        )
+
+        theta = angle * math.pi / 180.0
+        c = math.cos(theta)
+        s = math.sin(theta)
+        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
+        # x: left->right ; y: top->down
+        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
+        for k in range(4):
+            j = (k + 1) % 4
+            self.draw_line(
+                [rotated_rect[k][0], rotated_rect[j][0]],
+                [rotated_rect[k][1], rotated_rect[j][1]],
+                color=edge_color,
+                linestyle="--" if k == 1 else line_style,
+                linewidth=linewidth,
+            )
+
+        if label is not None:
+            text_pos = rotated_rect[1]  # topleft corner
+
+            height_ratio = h / np.sqrt(self.output.height * self.output.width)
+            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
+            font_size = (
+                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
+            )
+            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
+
+        return self.output
+
+    def draw_circle(self, circle_coord, color, radius=3):
+        """
+        Args:
+            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
+                of the center of the circle.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            radius (int): radius of the circle.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x, y = circle_coord
+        self.output.ax.add_patch(
+            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
+        )
+        return self.output
+
+    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
+        """
+        Args:
+            x_data (list[int]): a list containing x values of all the points being drawn.
+                Length of list should match the length of y_data.
+            y_data (list[int]): a list containing y values of all the points being drawn.
+                Length of list should match the length of x_data.
+            color: color of the line. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
+                for a full list of formats that are accepted.
+            linewidth (float or None): width of the line. When it's None,
+                a default value will be computed and used.
+
+        Returns:
+            output (VisImage): image object with line drawn.
+        """
+        if linewidth is None:
+            linewidth = self._default_font_size / 3
+        linewidth = max(linewidth, 1)
+        self.output.ax.add_line(
+            mpl.lines.Line2D(
+                x_data,
+                y_data,
+                linewidth=linewidth * self.output.scale,
+                color=color,
+                linestyle=linestyle,
+            )
+        )
+        return self.output
+
+    def draw_binary_mask(
+        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=0
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn in the object's center of mass.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component small than this will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = False
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+
+        if not mask.has_holes:
+            # draw polygons for regular masks
+            for segment in mask.polygons:
+                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
+                if area < (area_threshold or 0):
+                    continue
+                has_valid_segment = True
+                segment = segment.reshape(-1, 2)
+                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
+        else:
+            # TODO: Use Path/PathPatch to draw vector graphics:
+            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+            rgba = np.zeros(shape2d + (4,), dtype="float32")
+            rgba[:, :, :3] = color
+            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+            has_valid_segment = True
+            self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if text is not None and has_valid_segment:
+            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+            largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+            # draw text on the largest component, as well as other very large components.
+            for cid in range(1, _num_cc):
+                if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+                    # median is more stable than centroid
+                    # center = centroids[largest_component_id]
+                    center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+                    self.draw_text(text, center, color=lighter_color)
+        return self.output
+
+    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
+        """
+        Args:
+            segment: numpy array of shape Nx2, containing all the points in the polygon.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted. If not provided, a darker shade
+                of the polygon color will be used instead.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with polygon drawn.
+        """
+        #luyao#
+        # edge_color = []
+        if edge_color is None:
+            # make edge color darker than the polygon color
+            if alpha > 0.8:
+                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
+            else:
+                edge_color = color
+        edge_color = mplc.to_rgb(edge_color) + (1,)
+
+        polygon = mpl.patches.Polygon(
+            segment,
+            fill=True,
+            facecolor=mplc.to_rgb(color) + (alpha,),
+            #luyao# qudiaomaskyanse
+            # edgecolor=edge_color,
+            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
+        )
+        self.output.ax.add_patch(polygon)
+        return self.output
+
+    """
+    Internal methods:
+    """
+
+    def _jitter(self, color):
+        """
+        Randomly modifies given color to produce a slightly different color than the color given.
+
+        Args:
+            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
+                picked. The values in the list are in the [0.0, 1.0] range.
+
+        Returns:
+            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
+                color after being jittered. The values in the list are in the [0.0, 1.0] range.
+        """
+        color = mplc.to_rgb(color)
+        vec = np.random.rand(3)
+        # better to do it in another color space
+        vec = vec / np.linalg.norm(vec) * 0.5
+        res = np.clip(vec + color, 0, 1)
+        return tuple(res)
+
+    def _create_grayscale_image(self, mask=None):
+        """
+        Create a grayscale version of the original image.
+        The colors in masked area, if given, will be kept.
+        """
+        img_bw = self.img.astype("f4").mean(axis=2)
+        img_bw = np.stack([img_bw] * 3, axis=2)
+        if mask is not None:
+            img_bw[mask] = self.img[mask]
+        return img_bw
+
+    def _change_color_brightness(self, color, brightness_factor):
+        """
+        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
+        less or more saturation than the original color.
+
+        Args:
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
+                0 will correspond to no change, a factor in [-1.0, 0) range will result in
+                a darker color and a factor in (0, 1.0] range will result in a lighter color.
+
+        Returns:
+            modified_color (tuple[double]): a tuple containing the RGB values of the
+                modified color. Each value in the tuple is in the [0.0, 1.0] range.
+        """
+        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
+        color = mplc.to_rgb(color)
+        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
+        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
+        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
+        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
+        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
+        return modified_color
+
+    def _convert_boxes(self, boxes):
+        """
+        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
+        """
+        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+            return boxes.tensor.numpy()
+        else:
+            return np.asarray(boxes)
+
+    def _convert_masks(self, masks_or_polygons):
+        """
+        Convert different format of masks or polygons to a tuple of masks and polygons.
+
+        Returns:
+            list[GenericMask]:
+        """
+
+        m = masks_or_polygons
+        if isinstance(m, PolygonMasks):
+            m = m.polygons
+        if isinstance(m, BitMasks):
+            m = m.tensor.numpy()
+        if isinstance(m, torch.Tensor):
+            m = m.numpy()
+        ret = []
+        for x in m:
+            if isinstance(x, GenericMask):
+                ret.append(x)
+            else:
+                ret.append(GenericMask(x, self.output.height, self.output.width))
+        return ret
+
+    def _convert_keypoints(self, keypoints):
+        if isinstance(keypoints, Keypoints):
+            keypoints = keypoints.tensor
+        keypoints = np.asarray(keypoints)
+        return keypoints
+
+    def get_output(self):
+        """
+        Returns:
+            output (VisImage): the image output containing the visualizations added
+            to the image.
+        """
+        return self.output
+        
+def polygon2rbox(polygon, image_height, image_width):
+    poly = np.array(polygon).reshape((-1, 2)).astype(np.float32)
+    rect = cv2.minAreaRect(poly)
+    corners = cv2.boxPoints(rect)
+    corners = np.array(corners, dtype="int")
+    pts = get_tight_rect(corners, 0, 0, image_height, image_width, 1)
+    pts = list(map(int, pts))
+    return pts
+
+def get_tight_rect(points, start_x, start_y, image_height, image_width, scale):
+    points = list(points)
+    ps = sorted(points, key=lambda x: x[0])
+
+    if ps[1][1] > ps[0][1]:
+        px1 = ps[0][0] * scale + start_x
+        py1 = ps[0][1] * scale + start_y
+        px4 = ps[1][0] * scale + start_x
+        py4 = ps[1][1] * scale + start_y
+    else:
+        px1 = ps[1][0] * scale + start_x
+        py1 = ps[1][1] * scale + start_y
+        px4 = ps[0][0] * scale + start_x
+        py4 = ps[0][1] * scale + start_y
+    if ps[3][1] > ps[2][1]:
+        px2 = ps[2][0] * scale + start_x
+        py2 = ps[2][1] * scale + start_y
+        px3 = ps[3][0] * scale + start_x
+        py3 = ps[3][1] * scale + start_y
+    else:
+        px2 = ps[3][0] * scale + start_x
+        py2 = ps[3][1] * scale + start_y
+        px3 = ps[2][0] * scale + start_x
+        py3 = ps[2][1] * scale + start_y
+
+    px1 = min(max(px1, 1), image_width - 1)
+    px2 = min(max(px2, 1), image_width - 1)
+    px3 = min(max(px3, 1), image_width - 1)
+    px4 = min(max(px4, 1), image_width - 1)
+    py1 = min(max(py1, 1), image_height - 1)
+    py2 = min(max(py2, 1), image_height - 1)
+    py3 = min(max(py3, 1), image_height - 1)
+    py4 = min(max(py4, 1), image_height - 1)
+    return [px1, py1, px2, py2, px3, py3, px4, py4]
\ No newline at end of file
diff --git a/src/sts/detectron2/utils/visualizer_chn.py b/src/sts/detectron2/utils/visualizer_chn.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec1e7a4327fad2f573e16de435bb64466ba000c2
--- /dev/null
+++ b/src/sts/detectron2/utils/visualizer_chn.py
@@ -0,0 +1,1376 @@
+# Edit by Yao Lu
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+import colorsys
+import logging
+import math
+import numpy as np
+from enum import Enum, unique
+import cv2
+import matplotlib as mpl
+import matplotlib.colors as mplc
+import matplotlib.figure as mplfigure
+import pycocotools.mask as mask_util
+import torch
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from .colormap import random_color
+from shapely.geometry import *
+import pickle
+import matplotlib.font_manager as mfm
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["ColorMode", "VisImage", "Visualizer"]
+
+
+_SMALL_OBJECT_AREA_THRESH = 1000
+_LARGE_MASK_AREA_THRESH = 120000
+_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
+_BLACK = (0, 0, 0)
+_RED = (1.0, 0, 0)
+
+_KEYPOINT_THRESHOLD = 0.05
+
+
+def py_cpu_pnms(dets, scores, thresh):
+    pts = dets
+    # for i in xrange(dets.shape[0]):
+    #     pts.append([[int(bbox[i, 0]) + info_bbox[i, j], int(bbox[i, 1]) + info_bbox[i, j+1]] for j in xrange(0,28,2)])
+    scores = np.array(scores)
+    order = scores.argsort()[::-1]
+    areas = np.zeros(scores.shape)
+    order = scores.argsort()[::-1]
+    inter_areas = np.zeros((scores.shape[0], scores.shape[0]))
+    for il in range(len(pts)):
+        poly = Polygon(pts[il]).buffer(0.001)
+        areas[il] = poly.area
+        for jl in range(il, len(pts)):
+            polyj = Polygon(pts[jl].tolist()).buffer(0.001)
+            inS = poly.intersection(polyj)
+            try:
+                inter_areas[il][jl] = inS.area
+            except:
+                import pdb;pdb.set_trace()
+            inter_areas[jl][il] = inS.area
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        ovr = inter_areas[i][order[1:]] / ((areas[i]) + areas[order[1:]] - inter_areas[i][order[1:]])
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+@unique
+class ColorMode(Enum):
+    """
+    Enum of different color modes to use for instance visualizations.
+    """
+
+    IMAGE = 0
+    """
+    Picks a random color for every instance and overlay segmentations with low opacity.
+    """
+    SEGMENTATION = 1
+    """
+    Let instances of the same category have similar colors
+    (from metadata.thing_colors), and overlay them with
+    high opacity. This provides more attention on the quality of segmentation.
+    """
+    IMAGE_BW = 2
+    """
+    Same as IMAGE, but convert all areas without masks to gray-scale.
+    Only available for drawing per-instance mask predictions.
+    """
+
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (height, width), m.shape
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        #res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+
+class _PanopticPrediction:
+    def __init__(self, panoptic_seg, segments_info, metadata=None):
+        if segments_info is None:
+            assert metadata is not None
+            # If "segments_info" is None, we assume "panoptic_img" is a
+            # H*W int32 image storing the panoptic_id in the format of
+            # category_id * label_divisor + instance_id. We reserve -1 for
+            # VOID label.
+            label_divisor = metadata.label_divisor
+            segments_info = []
+            for panoptic_label in np.unique(panoptic_seg.numpy()):
+                if panoptic_label == -1:
+                    # VOID region.
+                    continue
+                pred_class = panoptic_label // label_divisor
+                isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
+                segments_info.append(
+                    {
+                        "id": int(panoptic_label),
+                        "category_id": int(pred_class),
+                        "isthing": bool(isthing),
+                    }
+                )
+        del metadata
+
+        self._seg = panoptic_seg
+
+        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
+        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
+        areas = areas.numpy()
+        sorted_idxs = np.argsort(-areas)
+        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
+        self._seg_ids = self._seg_ids.tolist()
+        for sid, area in zip(self._seg_ids, self._seg_areas):
+            if sid in self._sinfo:
+                self._sinfo[sid]["area"] = float(area)
+
+    def non_empty_mask(self):
+        """
+        Returns:
+            (H, W) array, a mask for all pixels that have a prediction
+        """
+        empty_ids = []
+        for id in self._seg_ids:
+            if id not in self._sinfo:
+                empty_ids.append(id)
+        if len(empty_ids) == 0:
+            return np.zeros(self._seg.shape, dtype=np.uint8)
+        assert (
+            len(empty_ids) == 1
+        ), ">1 ids corresponds to no labels. This is currently not supported"
+        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
+
+    def semantic_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or sinfo["isthing"]:
+                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
+                continue
+            yield (self._seg == sid).numpy().astype(np.bool), sinfo
+
+    def instance_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or not sinfo["isthing"]:
+                continue
+            mask = (self._seg == sid).numpy().astype(np.bool)
+            if mask.sum() > 0:
+                yield mask, sinfo
+
+
+def _create_text_labels(classes, scores, class_names):
+    """
+    Args:
+        classes (list[int] or None):
+        scores (list[float] or None):
+        class_names (list[str] or None):
+
+    Returns:
+        list[str] or None
+    """
+    labels = None
+    if classes is not None and class_names is not None and len(class_names) > 0:
+        labels = [class_names[i] for i in classes]
+    if scores is not None:
+        if labels is None:
+            labels = ["{:.0f}%".format(s * 100) for s in scores]
+        else:
+            # labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
+            #luyao
+            labels = ["{}.{:.0f}".format(l, s * 100) for l, s in zip(labels, scores)]
+    return labels
+
+
+class VisImage:
+    def __init__(self, img, scale=1.0):
+        """
+        Args:
+            img (ndarray): an RGB image of shape (H, W, 3).
+            scale (float): scale the input image
+        """
+        self.img = img
+        self.scale = scale
+        self.width, self.height = img.shape[1], img.shape[0]
+        self._setup_figure(img)
+
+    def _setup_figure(self, img):
+        """
+        Args:
+            Same as in :meth:`__init__()`.
+
+        Returns:
+            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
+            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
+        """
+        fig = mplfigure.Figure(frameon=False)
+        self.dpi = fig.get_dpi()
+        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
+        # (https://github.com/matplotlib/matplotlib/issues/15363)
+        fig.set_size_inches(
+            (self.width * self.scale + 1e-2) / self.dpi,
+            (self.height * self.scale + 1e-2) / self.dpi,
+        )
+        self.canvas = FigureCanvasAgg(fig)
+        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
+        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
+        ax.axis("off")
+        # Need to imshow this first so that other patches can be drawn on top
+        ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
+
+        self.fig = fig
+        self.ax = ax
+
+    def save(self, filepath):
+        """
+        Args:
+            filepath (str): a string that contains the absolute path, including the file name, where
+                the visualized image will be saved.
+        """
+        self.fig.savefig(filepath)
+       # self.fig.savefig(filepath[:-4]+'.svg', format='svg')
+
+    def get_image(self):
+        """
+        Returns:
+            ndarray:
+                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
+                The shape is scaled w.r.t the input image using the given `scale` argument.
+        """
+        canvas = self.canvas
+        s, (width, height) = canvas.print_to_buffer()
+        # buf = io.BytesIO()  # works for cairo backend
+        # canvas.print_rgba(buf)
+        # width, height = self.width, self.height
+        # s = buf.getvalue()
+
+        buffer = np.frombuffer(s, dtype="uint8")
+
+        img_rgba = buffer.reshape(height, width, 4)
+        rgb, alpha = np.split(img_rgba, [3], axis=2)
+        return rgb.astype("uint8")
+
+
+class Visualizer:
+    """
+    Visualizer that draws data about detection/segmentation on images.
+
+    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
+    that draw primitive objects to images, as well as high-level wrappers like
+    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
+    that draw composite data in some pre-defined style.
+
+    Note that the exact visualization style for the high-level wrappers are subject to change.
+    Style such as color, opacity, label contents, visibility of labels, or even the visibility
+    of objects themselves (e.g. when the object is too small) may change according
+    to different heuristics, as long as the results still look visually reasonable.
+    To obtain a consistent style, implement custom drawing functions with the primitive
+    methods instead.
+
+    This visualizer focuses on high rendering quality rather than performance. It is not
+    designed to be used for real-time applications.
+    """
+
+    # TODO implement a fast, rasterized version using OpenCV
+
+    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
+                the height and width of the image respectively. C is the number of
+                color channels. The image is required to be in RGB format since that
+                is a requirement of the Matplotlib library. The image is also expected
+                to be in the range [0, 255].
+            metadata (Metadata): image metadata.
+            instance_mode (ColorMode): defines one of the pre-defined style for drawing
+                instances on an image.
+        """
+        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
+        if metadata is None:
+            metadata = MetadataCatalog.get("__nonexist__")
+        self.metadata = metadata
+        self.output = VisImage(self.img, scale=scale)
+        self.cpu_device = torch.device("cpu")
+
+        # too small texts are useless, therefore clamp to 9
+        self._default_font_size = max(
+            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
+        )
+        self._instance_mode = instance_mode
+        with open('chn_cls_list.txt', 'rb') as fp:
+            self.CTLABELS = pickle.load(fp)
+
+    def draw_instance_predictions(self, predictions, path):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = predictions.pred_classes if predictions.has("pred_classes") else None
+        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        #luyao#
+        # labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+        rec = predictions.pred_rec if predictions.has("pred_rec") else None
+        rec_score = predictions.pred_rec_score if predictions.has("pred_rec_score") else None
+        #luyao#
+        if predictions.has("pred_masks"):
+            masks = np.asarray(predictions.pred_masks)
+            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
+        else:
+            masks = None
+        # masks = None
+        
+        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
+            ]
+            #luyao#
+            alpha = 0.8
+        else:
+            colors = None
+            alpha = 0.77
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(
+                (predictions.pred_masks.any(dim=0) > 0).numpy()
+                if predictions.has("pred_masks")
+                else None
+            )
+            alpha = 0.3
+
+        self.overlay_instances(
+            rec=rec,
+            masks=masks,
+            boxes=boxes,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+            scores=scores,
+            path=path,
+            rec_score = rec_score
+        )
+        return self.output
+
+    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8):
+        """
+        Draw semantic segmentation predictions/labels.
+
+        Args:
+            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
+                Each value is the integer label of the pixel.
+            area_threshold (int): segments with less than `area_threshold` are not drawn.
+            alpha (float): the larger it is, the more opaque the segmentations are.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        if isinstance(sem_seg, torch.Tensor):
+            sem_seg = sem_seg.numpy()
+        labels, areas = np.unique(sem_seg, return_counts=True)
+        sorted_idxs = np.argsort(-areas).tolist()
+        labels = labels[sorted_idxs]
+        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
+            except (AttributeError, IndexError):
+                mask_color = None
+
+            binary_mask = (sem_seg == label).astype(np.uint8)
+            text = self.metadata.stuff_classes[label]
+            self.draw_binary_mask(
+                binary_mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        return self.output
+
+    def draw_panoptic_seg_predictions(
+        self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7
+    ):
+        """
+        Draw panoptic prediction results on an image.
+
+        Args:
+            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
+                segment.
+            segments_info (list[dict]): Describe each segment in `panoptic_seg`.
+                Each dict contains keys "id", "category_id", "isthing".
+            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(pred.non_empty_mask())
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            text = self.metadata.stuff_classes[category_idx]
+            self.draw_binary_mask(
+                mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        # draw mask for all instances second
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return self.output
+        masks, sinfo = list(zip(*all_instances))
+        category_ids = [x["category_id"] for x in sinfo]
+
+        try:
+            scores = [x["score"] for x in sinfo]
+        except KeyError:
+            scores = None
+        labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes)
+
+        try:
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids
+            ]
+        except AttributeError:
+            colors = None
+        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
+
+        return self.output
+
+    def draw_dataset_dict(self, dic):
+        """
+        Draw annotations/segmentaions in Detectron2 Dataset format.
+
+        Args:
+            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        annos = dic.get("annotations", None)
+        if annos:
+            if "segmentation" in annos[0]:
+                masks = [x["segmentation"] for x in annos]
+            else:
+                masks = None
+            if "keypoints" in annos[0]:
+                keypts = [x["keypoints"] for x in annos]
+                keypts = np.array(keypts).reshape(len(annos), -1, 3)
+            else:
+                keypts = None
+
+            boxes = [
+                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
+                if len(x["bbox"]) == 4
+                else x["bbox"]
+                for x in annos
+            ]
+
+            labels = [x["category_id"] for x in annos]
+            colors = None
+            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+                colors = [
+                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels
+                ]
+            names = self.metadata.get("thing_classes", None)
+            if names:
+                labels = [names[i] for i in labels]
+            labels = [
+                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
+                for i, a in zip(labels, annos)
+            ]
+            self.overlay_instances(
+                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
+            )
+
+        sem_seg = dic.get("sem_seg", None)
+        if sem_seg is None and "sem_seg_file_name" in dic:
+            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
+                sem_seg = Image.open(f)
+                sem_seg = np.asarray(sem_seg, dtype="uint8")
+        if sem_seg is not None:
+            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
+
+        pan_seg = dic.get("pan_seg", None)
+        if pan_seg is None and "pan_seg_file_name" in dic:
+            assert "segments_info" in dic
+            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
+                pan_seg = Image.open(f)
+                pan_seg = np.asarray(pan_seg)
+                from panopticapi.utils import rgb2id
+
+                pan_seg = rgb2id(pan_seg)
+            segments_info = dic["segments_info"]
+        if pan_seg is not None:
+            pan_seg = torch.Tensor(pan_seg)
+            self.draw_panoptic_seg_predictions(pan_seg, segments_info, area_threshold=0, alpha=0.5)
+        return self.output
+
+    def overlay_instances(
+        self,
+        *,
+        rec=None,
+        boxes=None,
+        labels=None,
+        masks=None,
+        keypoints=None,
+        assigned_colors=None,
+        alpha=0.5,
+        scores,
+        path,
+        rec_score,
+    ):
+        """
+        Args:
+            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
+                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
+                or a :class:`RotatedBoxes`,
+                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image,
+            labels (list[str]): the text to be displayed for each instance.
+            masks (masks-like object): Supported types are:
+
+                * :class:`detectron2.structures.PolygonMasks`,
+                  :class:`detectron2.structures.BitMasks`.
+                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
+                  The first level of the list corresponds to individual instances. The second
+                  level to all the polygon that compose the instance, and the third level
+                  to the polygon coordinates. The third level should have the format of
+                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
+                * list[dict]: each dict is a COCO-style RLE.
+            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
+                where the N is the number of instances and K is the number of keypoints.
+                The last dimension corresponds to (x, y, visibility or score).
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        rec = rec
+        def _ctc_decode_recognition(rec):
+            #CTLABELS = "_0123456789abcdefghijklmnopqrstuvwxyz"
+            # CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
+            # ctc decoding
+            s = ''
+            for c in rec:
+                c = int(c)
+                if c < 5461:
+                    s += str(chr(self.CTLABELS[c]))
+                elif c == 5462:
+                    s += u''
+    
+            return s
+        num_instances = None
+        if boxes is not None:
+            boxes = self._convert_boxes(boxes)
+            num_instances = len(boxes)
+        if masks is not None:
+            masks = self._convert_masks(masks)
+            if num_instances:
+                assert len(masks) == num_instances
+            else:
+                num_instances = len(masks)
+        if keypoints is not None:
+            if num_instances:
+                assert len(keypoints) == num_instances
+            else:
+                num_instances = len(keypoints)
+            keypoints = self._convert_keypoints(keypoints)
+        if labels is not None:
+            assert len(labels) == num_instances
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+        if num_instances == 0:
+            return self.output
+        if boxes is not None and boxes.shape[1] == 5:
+            return self.overlay_rotated_instances(
+                boxes=boxes, labels=labels, assigned_colors=assigned_colors
+            )
+
+        # Display in largest to smallest order to reduce occlusion.
+        areas = None
+        if boxes is not None:
+            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
+        elif masks is not None:
+            areas = np.asarray([x.area() for x in masks])
+
+        if areas is not None:
+            sorted_idxs = np.argsort(-areas).tolist()
+            # Re-order overlapped instances in descending order.
+            boxes = boxes[sorted_idxs] if boxes is not None else None
+            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
+            rec = [rec[idx] for idx in sorted_idxs] if rec is not None else None
+            # rec_score = [rec_score[idx] for idx in sorted_idxs] if rec is not None else None
+            scores = [scores[idx] for idx in sorted_idxs] if scores is not None else None
+            # assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
+            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
+        #luyao#
+        assigned_colors = [[0,113.985,118.955],[216.75,82.875,24.99],[236.895, 176.97, 31.875],[125.97, 46.92, 141.78],[118.83, 171.87, 47.94],[76.755, 189.975, 237.915],[161.925, 19.89, 46.92],\
+            [255,140,0 ],[70,130,180 ],[128,128,0 ],[205,92,92 ],[128,0,128 ],[255,182,193],[255,255,0],[105,105,105],[0,255,255],[0,255,0 ],\
+            [210,180,140],[255,0,0 ],[0,139,139],[255,0,255],[127,255,0],[75,0,130],[32,178,170],[255,215,0],[219,112,147],[148,0,211 ],\
+                [100,149,237],[175,238,238 ],[143,188,143],[255,255,224 ],[244,164,96],[188,143,143],[192,192,192 ],[220,20,60],[218,112,214],[147,112,219]]
+        rec = [_ctc_decode_recognition(rrec) for rrec in rec]
+        # assigned_colors = [[1,140/255,0],[30/255,144/255,1],[148/255,0,211/255],[0,1,1],[1,0,0],\
+        #     [30/255,143/255,1],[0.94,0.5,0.5],[1,1,0],[0.5,0.5,0],[0.823,0.412,0.117],[0.58,0,0.827],[0.5,0,0]\
+        #     ,[0.82,0.41,0.12],[0.41,0.41,0.41],[0,0.54,0.54],[0.75,0.25,0.65],[0.2,0.6,0.8],[0.74,0,0.3],[0,1.0,0.4],[1,0.5,0.5],[0.5,0.5,1]\
+        #         ,[0.6,0,1],[0.56,0.56,0.3],[0,1,0],[1.0,0.0,0.4],[0.0,1.0,0.4],[0.0,0.5,1.0],[1,215/255,0]]
+        poly = []
+        alpha = 0.4
+        for i in range(num_instances):
+            if masks is not None:
+                poly.append(masks[i].polygons[0].astype(int).reshape(-1,2))
+        keep = py_cpu_pnms(poly,scores,0.5)
+        for i in range(num_instances):
+            # if rec[i] == ' ':
+            #     continue
+            if i not in keep:
+                continue
+            # color = assigned_colors[i]
+            # print(i)
+            color_ = assigned_colors[i%len(assigned_colors)]
+            color = [x/255 for x in color_]
+            # if boxes is not None:
+            #     self.draw_box(boxes[i], edge_color=color)
+            #luyao
+            # alpha = 0.6
+            H, W, _ = self.img.shape
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    segment = polygon2rbox(segment, H, W)
+                    segment = np.array(segment)
+                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
+            if labels is not None:
+                # first get a box
+                if boxes is not None:
+                    #luyao#
+                    x0, y0, x1, y1 = boxes[i]
+                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
+                    horiz_align = "left"
+                elif masks is not None:
+                    # skip small mask without polygon
+                    if len(masks[i].polygons) == 0:
+                        continue
+
+                    x0, y0, x1, y1 = masks[i].bbox()
+
+                    # draw text in the center (defined by median) when box is not drawn
+                    # median is less sensitive to outliers.
+                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
+                    horiz_align = "center"
+                else:
+                    continue  # drawing the box confidence for keypoints isn't very useful.
+                # for small objects, draw text at the side to avoid occlusion
+
+                instance_area = (y1 - y0) * (x1 - x0)
+                # print(x0,' ',x1,' ',y0,' ',y1,' ',self.output.height,' ', self.output.width)
+                #luyao#
+                if y0<5:
+                    text_pos = ((x0+x1)//2,(y0+y1)//2)
+                #luyao#
+                # if (
+                #     instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
+                #     or y1 - y0 < 40 * self.output.scale
+                # ):
+                #     if y1 >= self.output.height - 5:
+                #         text_pos = (x1, y0)
+                #     else:
+                #         text_pos = (x0, y1)
+
+                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
+                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+                font_size = (
+                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
+                    * 1.0
+                    * self._default_font_size
+                )
+                self.draw_text(
+                #    labels[i],
+                    # '',
+                    rec[i],
+                    text_pos,
+                    color=lighter_color,
+                    horizontal_alignment=horiz_align,
+                    font_size=font_size,
+                )
+        # draw keypoints
+        if keypoints is not None:
+            for keypoints_per_instance in keypoints:
+                self.draw_and_connect_keypoints(keypoints_per_instance)
+
+        return self.output
+
+    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
+        """
+        Args:
+            boxes (ndarray): an Nx5 numpy array of
+                (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image.
+            labels (list[str]): the text to be displayed for each instance.
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = len(boxes)
+
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+
+        if num_instances == 0:
+            return self.output
+
+        # Display in largest to smallest order to reduce occlusion.
+        if boxes is not None:
+            areas = boxes[:, 2] * boxes[:, 3]
+
+        sorted_idxs = np.argsort(-areas).tolist()
+        # Re-order overlapped instances in descending order.
+        boxes = boxes[sorted_idxs]
+        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+        colors = [assigned_colors[idx] for idx in sorted_idxs]
+
+        for i in range(num_instances):
+            self.draw_rotated_box_with_label(
+                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
+            )
+
+        return self.output
+
+    def draw_and_connect_keypoints(self, keypoints):
+        """
+        Draws keypoints of an instance and follows the rules for keypoint connections
+        to draw lines between appropriate keypoints. This follows color heuristics for
+        line color.
+
+        Args:
+            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
+                and the last dimension corresponds to (x, y, probability).
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        visible = {}
+        keypoint_names = self.metadata.get("keypoint_names")
+        for idx, keypoint in enumerate(keypoints):
+            # draw keypoint
+            x, y, prob = keypoint
+            if prob > _KEYPOINT_THRESHOLD:
+                self.draw_circle((x, y), color=_RED)
+                if keypoint_names:
+                    keypoint_name = keypoint_names[idx]
+                    visible[keypoint_name] = (x, y)
+
+        if self.metadata.get("keypoint_connection_rules"):
+            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
+                if kp0 in visible and kp1 in visible:
+                    x0, y0 = visible[kp0]
+                    x1, y1 = visible[kp1]
+                    color = tuple(x / 255.0 for x in color)
+                    self.draw_line([x0, x1], [y0, y1], color=color)
+
+        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
+        # Note that this strategy is specific to person keypoints.
+        # For other keypoints, it should just do nothing
+        try:
+            ls_x, ls_y = visible["left_shoulder"]
+            rs_x, rs_y = visible["right_shoulder"]
+            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
+        except KeyError:
+            pass
+        else:
+            # draw line from nose to mid-shoulder
+            nose_x, nose_y = visible.get("nose", (None, None))
+            if nose_x is not None:
+                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
+
+            try:
+                # draw line from mid-shoulder to mid-hip
+                lh_x, lh_y = visible["left_hip"]
+                rh_x, rh_y = visible["right_hip"]
+            except KeyError:
+                pass
+            else:
+                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
+                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
+        return self.output
+
+    """
+    Primitive drawing functions:
+    """
+
+    def draw_text(
+        self,
+        text,
+        position,
+        *,
+        font_size=None,
+        color="g",
+        horizontal_alignment="center",
+        rotation=0
+    ):
+        """
+        Args:
+            text (str): class label
+            position (tuple): a tuple of the x and y coordinates to place text on image.
+            font_size (int, optional): font of the text. If not provided, a font size
+                proportional to the image width is calculated and used.
+            color: color of the text. Refer to `matplotlib.colors` for full list
+                of formats that are accepted.
+            horizontal_alignment (str): see `matplotlib.text.Text`
+            rotation: rotation angle in degrees CCW
+
+        Returns:
+            output (VisImage): image object with text drawn.
+        """
+        if not font_size:
+            font_size = self._default_font_size
+
+        # print(font_size, self.output.scale)
+
+        # since the text background is dark, we don't want the text to be dark
+        # color = np.maximum(list(mplc.to_rgb(color)), 0.2)
+        # color[np.argmax(color)] = max(0.8, np.max(color))
+        #luyao#
+        color = 'w'
+        # font_size = 7.0
+        x, y = position
+        font_path = "simsun.ttc"
+        prop = mfm.FontProperties(fname=font_path)
+        self.output.ax.text(
+            x,
+            y,
+            text,
+            size=font_size * self.output.scale,
+            # family="sans-serif",
+            family="monospace",
+            # family="serif",
+            #luyao#
+            bbox={"facecolor": "black", "alpha": 0.0, "pad": 0.0, "edgecolor": "none"},
+            # bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
+            # verticalalignment="top",
+            verticalalignment="bottom",
+            horizontalalignment=horizontal_alignment,
+            color=color,
+            zorder=10,
+            
+            rotation=rotation,
+            fontproperties=prop,
+            #luyao
+            # fontweight='light'
+        )
+        return self.output
+
+    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
+        """
+        Args:
+            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
+                are the coordinates of the image's top left corner. x1 and y1 are the
+                coordinates of the image's bottom right corner.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x0, y0, x1, y1 = box_coord
+        width = x1 - x0
+        height = y1 - y0
+
+        # linewidth = max(self._default_font_size / 16, 1)
+        # linewidth = max(self._default_font_size / 4, 1)
+        #luyao#
+        edge_color=[0.196,0.80,0.196]
+        alpha = 1.0
+        linewidth = 0.7
+
+        self.output.ax.add_patch(
+            mpl.patches.Rectangle(
+                (x0, y0),
+                width,
+                height,
+                fill=False,
+                edgecolor=edge_color,
+                linewidth=linewidth * self.output.scale,
+                alpha=alpha,
+                linestyle=line_style,
+            )
+        )
+        return self.output
+
+    def draw_rotated_box_with_label(
+        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
+    ):
+        """
+        Draw a rotated box with label on its top-left corner.
+
+        Args:
+            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
+                where cnt_x and cnt_y are the center coordinates of the box.
+                w and h are the width and height of the box. angle represents how
+                many degrees the box is rotated CCW with regard to the 0-degree box.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+            label (string): label for rotated box. It will not be rendered when set to None.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        cnt_x, cnt_y, w, h, angle = rotated_box
+        area = w * h
+        # use thinner lines when the box is small
+        linewidth = self._default_font_size / (
+            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
+        )
+
+        theta = angle * math.pi / 180.0
+        c = math.cos(theta)
+        s = math.sin(theta)
+        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
+        # x: left->right ; y: top->down
+        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
+        for k in range(4):
+            j = (k + 1) % 4
+            self.draw_line(
+                [rotated_rect[k][0], rotated_rect[j][0]],
+                [rotated_rect[k][1], rotated_rect[j][1]],
+                color=edge_color,
+                linestyle="--" if k == 1 else line_style,
+                linewidth=linewidth,
+            )
+
+        if label is not None:
+            text_pos = rotated_rect[1]  # topleft corner
+
+            height_ratio = h / np.sqrt(self.output.height * self.output.width)
+            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
+            font_size = (
+                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
+            )
+            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
+
+        return self.output
+
+    def draw_circle(self, circle_coord, color, radius=3):
+        """
+        Args:
+            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
+                of the center of the circle.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            radius (int): radius of the circle.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x, y = circle_coord
+        self.output.ax.add_patch(
+            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
+        )
+        return self.output
+
+    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
+        """
+        Args:
+            x_data (list[int]): a list containing x values of all the points being drawn.
+                Length of list should match the length of y_data.
+            y_data (list[int]): a list containing y values of all the points being drawn.
+                Length of list should match the length of x_data.
+            color: color of the line. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
+                for a full list of formats that are accepted.
+            linewidth (float or None): width of the line. When it's None,
+                a default value will be computed and used.
+
+        Returns:
+            output (VisImage): image object with line drawn.
+        """
+        if linewidth is None:
+            linewidth = self._default_font_size / 3
+        linewidth = max(linewidth, 1)
+        self.output.ax.add_line(
+            mpl.lines.Line2D(
+                x_data,
+                y_data,
+                linewidth=linewidth * self.output.scale,
+                color=color,
+                linestyle=linestyle,
+            )
+        )
+        return self.output
+
+    def draw_binary_mask(
+        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=0
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn in the object's center of mass.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component small than this will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = False
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+
+        if not mask.has_holes:
+            # draw polygons for regular masks
+            for segment in mask.polygons:
+                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
+                if area < (area_threshold or 0):
+                    continue
+                has_valid_segment = True
+                segment = segment.reshape(-1, 2)
+                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
+        else:
+            # TODO: Use Path/PathPatch to draw vector graphics:
+            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+            rgba = np.zeros(shape2d + (4,), dtype="float32")
+            rgba[:, :, :3] = color
+            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+            has_valid_segment = True
+            self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if text is not None and has_valid_segment:
+            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+            largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+            # draw text on the largest component, as well as other very large components.
+            for cid in range(1, _num_cc):
+                if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+                    # median is more stable than centroid
+                    # center = centroids[largest_component_id]
+                    center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+                    self.draw_text(text, center, color=lighter_color)
+        return self.output
+
+    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
+        """
+        Args:
+            segment: numpy array of shape Nx2, containing all the points in the polygon.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted. If not provided, a darker shade
+                of the polygon color will be used instead.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with polygon drawn.
+        """
+        #luyao#
+        # edge_color = []
+        if edge_color is None:
+            # make edge color darker than the polygon color
+            if alpha > 0.8:
+                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
+            else:
+                edge_color = color
+        edge_color = mplc.to_rgb(edge_color) + (1,)
+
+        polygon = mpl.patches.Polygon(
+            segment,
+            fill=True,
+            facecolor=mplc.to_rgb(color) + (alpha,),
+            #luyao# qudiaomaskyanse
+            # edgecolor=edge_color,
+            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
+        )
+        self.output.ax.add_patch(polygon)
+        return self.output
+
+    """
+    Internal methods:
+    """
+
+    def _jitter(self, color):
+        """
+        Randomly modifies given color to produce a slightly different color than the color given.
+
+        Args:
+            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
+                picked. The values in the list are in the [0.0, 1.0] range.
+
+        Returns:
+            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
+                color after being jittered. The values in the list are in the [0.0, 1.0] range.
+        """
+        color = mplc.to_rgb(color)
+        vec = np.random.rand(3)
+        # better to do it in another color space
+        vec = vec / np.linalg.norm(vec) * 0.5
+        res = np.clip(vec + color, 0, 1)
+        return tuple(res)
+
+    def _create_grayscale_image(self, mask=None):
+        """
+        Create a grayscale version of the original image.
+        The colors in masked area, if given, will be kept.
+        """
+        img_bw = self.img.astype("f4").mean(axis=2)
+        img_bw = np.stack([img_bw] * 3, axis=2)
+        if mask is not None:
+            img_bw[mask] = self.img[mask]
+        return img_bw
+
+    def _change_color_brightness(self, color, brightness_factor):
+        """
+        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
+        less or more saturation than the original color.
+
+        Args:
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
+                0 will correspond to no change, a factor in [-1.0, 0) range will result in
+                a darker color and a factor in (0, 1.0] range will result in a lighter color.
+
+        Returns:
+            modified_color (tuple[double]): a tuple containing the RGB values of the
+                modified color. Each value in the tuple is in the [0.0, 1.0] range.
+        """
+        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
+        color = mplc.to_rgb(color)
+        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
+        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
+        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
+        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
+        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
+        return modified_color
+
+    def _convert_boxes(self, boxes):
+        """
+        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
+        """
+        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+            return boxes.tensor.numpy()
+        else:
+            return np.asarray(boxes)
+
+    def _convert_masks(self, masks_or_polygons):
+        """
+        Convert different format of masks or polygons to a tuple of masks and polygons.
+
+        Returns:
+            list[GenericMask]:
+        """
+
+        m = masks_or_polygons
+        if isinstance(m, PolygonMasks):
+            m = m.polygons
+        if isinstance(m, BitMasks):
+            m = m.tensor.numpy()
+        if isinstance(m, torch.Tensor):
+            m = m.numpy()
+        ret = []
+        for x in m:
+            if isinstance(x, GenericMask):
+                ret.append(x)
+            else:
+                ret.append(GenericMask(x, self.output.height, self.output.width))
+        return ret
+
+    def _convert_keypoints(self, keypoints):
+        if isinstance(keypoints, Keypoints):
+            keypoints = keypoints.tensor
+        keypoints = np.asarray(keypoints)
+        return keypoints
+
+    def get_output(self):
+        """
+        Returns:
+            output (VisImage): the image output containing the visualizations added
+            to the image.
+        """
+        return self.output
+        
+def polygon2rbox(polygon, image_height, image_width):
+    poly = np.array(polygon).reshape((-1, 2)).astype(np.float32)
+    rect = cv2.minAreaRect(poly)
+    corners = cv2.boxPoints(rect)
+    corners = np.array(corners, dtype="int")
+    pts = get_tight_rect(corners, 0, 0, image_height, image_width, 1)
+    pts = list(map(int, pts))
+    return pts
+
+def get_tight_rect(points, start_x, start_y, image_height, image_width, scale):
+    points = list(points)
+    ps = sorted(points, key=lambda x: x[0])
+
+    if ps[1][1] > ps[0][1]:
+        px1 = ps[0][0] * scale + start_x
+        py1 = ps[0][1] * scale + start_y
+        px4 = ps[1][0] * scale + start_x
+        py4 = ps[1][1] * scale + start_y
+    else:
+        px1 = ps[1][0] * scale + start_x
+        py1 = ps[1][1] * scale + start_y
+        px4 = ps[0][0] * scale + start_x
+        py4 = ps[0][1] * scale + start_y
+    if ps[3][1] > ps[2][1]:
+        px2 = ps[2][0] * scale + start_x
+        py2 = ps[2][1] * scale + start_y
+        px3 = ps[3][0] * scale + start_x
+        py3 = ps[3][1] * scale + start_y
+    else:
+        px2 = ps[3][0] * scale + start_x
+        py2 = ps[3][1] * scale + start_y
+        px3 = ps[2][0] * scale + start_x
+        py3 = ps[2][1] * scale + start_y
+
+    px1 = min(max(px1, 1), image_width - 1)
+    px2 = min(max(px2, 1), image_width - 1)
+    px3 = min(max(px3, 1), image_width - 1)
+    px4 = min(max(px4, 1), image_width - 1)
+    py1 = min(max(py1, 1), image_height - 1)
+    py2 = min(max(py2, 1), image_height - 1)
+    py3 = min(max(py3, 1), image_height - 1)
+    py4 = min(max(py4, 1), image_height - 1)
+    return [px1, py1, px2, py2, px3, py3, px4, py4]
diff --git a/src/sts/detectron2/utils/visualizer_vintext.py b/src/sts/detectron2/utils/visualizer_vintext.py
new file mode 100644
index 0000000000000000000000000000000000000000..175c5733c67383ab78bb8fa36bcade3736214fc6
--- /dev/null
+++ b/src/sts/detectron2/utils/visualizer_vintext.py
@@ -0,0 +1,1544 @@
+# Edit by Yao Lu
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+import colorsys
+import logging
+import math
+import numpy as np
+from enum import Enum, unique
+import cv2
+import matplotlib as mpl
+import matplotlib.colors as mplc
+import matplotlib.figure as mplfigure
+import pycocotools.mask as mask_util
+import torch
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+from PIL import Image
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
+from detectron2.utils.file_io import PathManager
+
+from .colormap import random_color
+from shapely.geometry import *
+import matplotlib.font_manager as mfm
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["ColorMode", "VisImage", "Visualizer"]
+
+
+_SMALL_OBJECT_AREA_THRESH = 1000
+_LARGE_MASK_AREA_THRESH = 120000
+_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
+_BLACK = (0, 0, 0)
+_RED = (1.0, 0, 0)
+
+_KEYPOINT_THRESHOLD = 0.05
+
+
+def py_cpu_pnms(dets, scores, thresh):
+    pts = dets
+    # for i in xrange(dets.shape[0]):
+    #     pts.append([[int(bbox[i, 0]) + info_bbox[i, j], int(bbox[i, 1]) + info_bbox[i, j+1]] for j in xrange(0,28,2)])
+    scores = np.array(scores)
+    order = scores.argsort()[::-1]
+    areas = np.zeros(scores.shape)
+    order = scores.argsort()[::-1]
+    inter_areas = np.zeros((scores.shape[0], scores.shape[0]))
+    for il in range(len(pts)):
+        poly = Polygon(pts[il]).buffer(0.001)
+        areas[il] = poly.area
+        for jl in range(il, len(pts)):
+            polyj = Polygon(pts[jl].tolist()).buffer(0.001)
+            inS = poly.intersection(polyj)
+            try:
+                inter_areas[il][jl] = inS.area
+            except:
+                import pdb;pdb.set_trace()
+            inter_areas[jl][il] = inS.area
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        ovr = inter_areas[i][order[1:]] / ((areas[i]) + areas[order[1:]] - inter_areas[i][order[1:]])
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+@unique
+class ColorMode(Enum):
+    """
+    Enum of different color modes to use for instance visualizations.
+    """
+
+    IMAGE = 0
+    """
+    Picks a random color for every instance and overlay segmentations with low opacity.
+    """
+    SEGMENTATION = 1
+    """
+    Let instances of the same category have similar colors
+    (from metadata.thing_colors), and overlay them with
+    high opacity. This provides more attention on the quality of segmentation.
+    """
+    IMAGE_BW = 2
+    """
+    Same as IMAGE, but convert all areas without masks to gray-scale.
+    Only available for drawing per-instance mask predictions.
+    """
+
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (height, width), m.shape
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = False  # if original format is polygon, does not have holes
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
+        #res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+
+class _PanopticPrediction:
+    def __init__(self, panoptic_seg, segments_info, metadata=None):
+        if segments_info is None:
+            assert metadata is not None
+            # If "segments_info" is None, we assume "panoptic_img" is a
+            # H*W int32 image storing the panoptic_id in the format of
+            # category_id * label_divisor + instance_id. We reserve -1 for
+            # VOID label.
+            label_divisor = metadata.label_divisor
+            segments_info = []
+            for panoptic_label in np.unique(panoptic_seg.numpy()):
+                if panoptic_label == -1:
+                    # VOID region.
+                    continue
+                pred_class = panoptic_label // label_divisor
+                isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
+                segments_info.append(
+                    {
+                        "id": int(panoptic_label),
+                        "category_id": int(pred_class),
+                        "isthing": bool(isthing),
+                    }
+                )
+        del metadata
+
+        self._seg = panoptic_seg
+
+        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
+        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
+        areas = areas.numpy()
+        sorted_idxs = np.argsort(-areas)
+        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
+        self._seg_ids = self._seg_ids.tolist()
+        for sid, area in zip(self._seg_ids, self._seg_areas):
+            if sid in self._sinfo:
+                self._sinfo[sid]["area"] = float(area)
+
+    def non_empty_mask(self):
+        """
+        Returns:
+            (H, W) array, a mask for all pixels that have a prediction
+        """
+        empty_ids = []
+        for id in self._seg_ids:
+            if id not in self._sinfo:
+                empty_ids.append(id)
+        if len(empty_ids) == 0:
+            return np.zeros(self._seg.shape, dtype=np.uint8)
+        assert (
+            len(empty_ids) == 1
+        ), ">1 ids corresponds to no labels. This is currently not supported"
+        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
+
+    def semantic_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or sinfo["isthing"]:
+                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
+                continue
+            yield (self._seg == sid).numpy().astype(np.bool), sinfo
+
+    def instance_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or not sinfo["isthing"]:
+                continue
+            mask = (self._seg == sid).numpy().astype(np.bool)
+            if mask.sum() > 0:
+                yield mask, sinfo
+
+
+def _create_text_labels(classes, scores, class_names):
+    """
+    Args:
+        classes (list[int] or None):
+        scores (list[float] or None):
+        class_names (list[str] or None):
+
+    Returns:
+        list[str] or None
+    """
+    labels = None
+    if classes is not None and class_names is not None and len(class_names) > 0:
+        labels = [class_names[i] for i in classes]
+    if scores is not None:
+        if labels is None:
+            labels = ["{:.0f}%".format(s * 100) for s in scores]
+        else:
+            # labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
+            #luyao
+            labels = ["{}.{:.0f}".format(l, s * 100) for l, s in zip(labels, scores)]
+    return labels
+
+
+class VisImage:
+    def __init__(self, img, scale=1.0):
+        """
+        Args:
+            img (ndarray): an RGB image of shape (H, W, 3).
+            scale (float): scale the input image
+        """
+        self.img = img
+        self.scale = scale
+        self.width, self.height = img.shape[1], img.shape[0]
+        self._setup_figure(img)
+
+    def _setup_figure(self, img):
+        """
+        Args:
+            Same as in :meth:`__init__()`.
+
+        Returns:
+            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
+            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
+        """
+        fig = mplfigure.Figure(frameon=False)
+        self.dpi = fig.get_dpi()
+        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
+        # (https://github.com/matplotlib/matplotlib/issues/15363)
+        fig.set_size_inches(
+            (self.width * self.scale + 1e-2) / self.dpi,
+            (self.height * self.scale + 1e-2) / self.dpi,
+        )
+        self.canvas = FigureCanvasAgg(fig)
+        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
+        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
+        ax.axis("off")
+        # Need to imshow this first so that other patches can be drawn on top
+        ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
+
+        self.fig = fig
+        self.ax = ax
+
+    def save(self, filepath):
+        """
+        Args:
+            filepath (str): a string that contains the absolute path, including the file name, where
+                the visualized image will be saved.
+        """
+        self.fig.savefig(filepath)
+       # self.fig.savefig(filepath[:-4]+'.svg', format='svg')
+
+    def get_image(self):
+        """
+        Returns:
+            ndarray:
+                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
+                The shape is scaled w.r.t the input image using the given `scale` argument.
+        """
+        canvas = self.canvas
+        s, (width, height) = canvas.print_to_buffer()
+        # buf = io.BytesIO()  # works for cairo backend
+        # canvas.print_rgba(buf)
+        # width, height = self.width, self.height
+        # s = buf.getvalue()
+
+        buffer = np.frombuffer(s, dtype="uint8")
+
+        img_rgba = buffer.reshape(height, width, 4)
+        rgb, alpha = np.split(img_rgba, [3], axis=2)
+        return rgb.astype("uint8")
+
+
+class Visualizer:
+    """
+    Visualizer that draws data about detection/segmentation on images.
+
+    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
+    that draw primitive objects to images, as well as high-level wrappers like
+    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
+    that draw composite data in some pre-defined style.
+
+    Note that the exact visualization style for the high-level wrappers are subject to change.
+    Style such as color, opacity, label contents, visibility of labels, or even the visibility
+    of objects themselves (e.g. when the object is too small) may change according
+    to different heuristics, as long as the results still look visually reasonable.
+    To obtain a consistent style, implement custom drawing functions with the primitive
+    methods instead.
+
+    This visualizer focuses on high rendering quality rather than performance. It is not
+    designed to be used for real-time applications.
+    """
+
+    # TODO implement a fast, rasterized version using OpenCV
+
+    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
+                the height and width of the image respectively. C is the number of
+                color channels. The image is required to be in RGB format since that
+                is a requirement of the Matplotlib library. The image is also expected
+                to be in the range [0, 255].
+            metadata (Metadata): image metadata.
+            instance_mode (ColorMode): defines one of the pre-defined style for drawing
+                instances on an image.
+        """
+        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
+        if metadata is None:
+            metadata = MetadataCatalog.get("__nonexist__")
+        self.metadata = metadata
+        self.output = VisImage(self.img, scale=scale)
+        self.cpu_device = torch.device("cpu")
+
+        # too small texts are useless, therefore clamp to 9
+        self._default_font_size = max(
+            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
+        )
+        self._instance_mode = instance_mode
+
+    def draw_instance_predictions(self, predictions, path):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = predictions.pred_classes if predictions.has("pred_classes") else None
+        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        #luyao#
+        # labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
+        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+        rec = predictions.pred_rec if predictions.has("pred_rec") else None
+        rec_score = predictions.pred_rec_score if predictions.has("pred_rec_score") else None
+        #luyao#
+        if predictions.has("pred_masks"):
+            masks = np.asarray(predictions.pred_masks)
+            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
+        else:
+            masks = None
+        # masks = None
+        
+        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
+            ]
+            #luyao#
+            alpha = 0.8
+        else:
+            colors = None
+            alpha = 0.77
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(
+                (predictions.pred_masks.any(dim=0) > 0).numpy()
+                if predictions.has("pred_masks")
+                else None
+            )
+            alpha = 0.3
+
+        self.overlay_instances(
+            rec=rec,
+            masks=masks,
+            boxes=boxes,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+            scores=scores,
+            path=path,
+            rec_score = rec_score
+        )
+        return self.output
+
+    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8):
+        """
+        Draw semantic segmentation predictions/labels.
+
+        Args:
+            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
+                Each value is the integer label of the pixel.
+            area_threshold (int): segments with less than `area_threshold` are not drawn.
+            alpha (float): the larger it is, the more opaque the segmentations are.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        if isinstance(sem_seg, torch.Tensor):
+            sem_seg = sem_seg.numpy()
+        labels, areas = np.unique(sem_seg, return_counts=True)
+        sorted_idxs = np.argsort(-areas).tolist()
+        labels = labels[sorted_idxs]
+        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
+            except (AttributeError, IndexError):
+                mask_color = None
+
+            binary_mask = (sem_seg == label).astype(np.uint8)
+            text = self.metadata.stuff_classes[label]
+            self.draw_binary_mask(
+                binary_mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        return self.output
+
+    def draw_panoptic_seg_predictions(
+        self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7
+    ):
+        """
+        Draw panoptic prediction results on an image.
+
+        Args:
+            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
+                segment.
+            segments_info (list[dict]): Describe each segment in `panoptic_seg`.
+                Each dict contains keys "id", "category_id", "isthing".
+            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.img = self._create_grayscale_image(pred.non_empty_mask())
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            text = self.metadata.stuff_classes[category_idx]
+            self.draw_binary_mask(
+                mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        # draw mask for all instances second
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return self.output
+        masks, sinfo = list(zip(*all_instances))
+        category_ids = [x["category_id"] for x in sinfo]
+
+        try:
+            scores = [x["score"] for x in sinfo]
+        except KeyError:
+            scores = None
+        labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes)
+
+        try:
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids
+            ]
+        except AttributeError:
+            colors = None
+        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
+
+        return self.output
+
+    def draw_dataset_dict(self, dic):
+        """
+        Draw annotations/segmentaions in Detectron2 Dataset format.
+
+        Args:
+            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        annos = dic.get("annotations", None)
+        if annos:
+            if "segmentation" in annos[0]:
+                masks = [x["segmentation"] for x in annos]
+            else:
+                masks = None
+            if "keypoints" in annos[0]:
+                keypts = [x["keypoints"] for x in annos]
+                keypts = np.array(keypts).reshape(len(annos), -1, 3)
+            else:
+                keypts = None
+
+            boxes = [
+                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
+                if len(x["bbox"]) == 4
+                else x["bbox"]
+                for x in annos
+            ]
+
+            labels = [x["category_id"] for x in annos]
+            colors = None
+            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
+                colors = [
+                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels
+                ]
+            names = self.metadata.get("thing_classes", None)
+            if names:
+                labels = [names[i] for i in labels]
+            labels = [
+                "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
+                for i, a in zip(labels, annos)
+            ]
+            self.overlay_instances(
+                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
+            )
+
+        sem_seg = dic.get("sem_seg", None)
+        if sem_seg is None and "sem_seg_file_name" in dic:
+            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
+                sem_seg = Image.open(f)
+                sem_seg = np.asarray(sem_seg, dtype="uint8")
+        if sem_seg is not None:
+            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
+
+        pan_seg = dic.get("pan_seg", None)
+        if pan_seg is None and "pan_seg_file_name" in dic:
+            assert "segments_info" in dic
+            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
+                pan_seg = Image.open(f)
+                pan_seg = np.asarray(pan_seg)
+                from panopticapi.utils import rgb2id
+
+                pan_seg = rgb2id(pan_seg)
+            segments_info = dic["segments_info"]
+        if pan_seg is not None:
+            pan_seg = torch.Tensor(pan_seg)
+            self.draw_panoptic_seg_predictions(pan_seg, segments_info, area_threshold=0, alpha=0.5)
+        return self.output
+
+    def overlay_instances(
+        self,
+        *,
+        rec=None,
+        boxes=None,
+        labels=None,
+        masks=None,
+        keypoints=None,
+        assigned_colors=None,
+        alpha=0.5,
+        scores,
+        path,
+        rec_score,
+    ):
+        """
+        Args:
+            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
+                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
+                or a :class:`RotatedBoxes`,
+                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image,
+            labels (list[str]): the text to be displayed for each instance.
+            masks (masks-like object): Supported types are:
+
+                * :class:`detectron2.structures.PolygonMasks`,
+                  :class:`detectron2.structures.BitMasks`.
+                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
+                  The first level of the list corresponds to individual instances. The second
+                  level to all the polygon that compose the instance, and the third level
+                  to the polygon coordinates. The third level should have the format of
+                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
+                * list[dict]: each dict is a COCO-style RLE.
+            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
+                where the N is the number of instances and K is the number of keypoints.
+                The last dimension corresponds to (x, y, visibility or score).
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        rec = rec
+        def _ctc_decode_recognition(rec):
+            # CTLABELS = "_0123456789abcdefghijklmnopqrstuvwxyz"
+            # CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~']
+            CTLABELS = [
+            " ",
+            "!",
+            '"',
+            "#",
+            "$",
+            "%",
+            "&",
+            "'",
+            "(",
+            ")",
+            "*",
+            "+",
+            ",",
+            "-",
+            ".",
+            "/",
+            "0",
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9",
+            ":",
+            ";",
+            "<",
+            "=",
+            ">",
+            "?",
+            "@",
+            "A",
+            "B",
+            "C",
+            "D",
+            "E",
+            "F",
+            "G",
+            "H",
+            "I",
+            "J",
+            "K",
+            "L",
+            "M",
+            "N",
+            "O",
+            "P",
+            "Q",
+            "R",
+            "S",
+            "T",
+            "U",
+            "V",
+            "W",
+            "X",
+            "Y",
+            "Z",
+            "[",
+            "\\",
+            "]",
+            "^",
+            "_",
+            "`",
+            "a",
+            "b",
+            "c",
+            "d",
+            "e",
+            "f",
+            "g",
+            "h",
+            "i",
+            "j",
+            "k",
+            "l",
+            "m",
+            "n",
+            "o",
+            "p",
+            "q",
+            "r",
+            "s",
+            "t",
+            "u",
+            "v",
+            "w",
+            "x",
+            "y",
+            "z",
+            "{",
+            "|",
+            "}",
+            "~",
+            "ˋ",
+            "ˊ",
+            "﹒",
+            "ˀ",
+            "˜",
+            "ˇ",
+            "ˆ",
+            "˒",
+            "‑",
+        ]
+            # ctc decoding
+            last_char = False
+            s = ''
+            for c in rec:
+                c = int(c)
+                if 0<c < 107:
+                        s += CTLABELS[c-1]
+                        last_char = c
+                elif c == 0:
+                    s += u''
+                else:
+                    last_char = False
+            if len(s) == 0:
+                s = ' '
+            s = decoder(s)
+            return s
+        num_instances = None
+        if boxes is not None:
+            boxes = self._convert_boxes(boxes)
+            num_instances = len(boxes)
+        if masks is not None:
+            masks = self._convert_masks(masks)
+            if num_instances:
+                assert len(masks) == num_instances
+            else:
+                num_instances = len(masks)
+        if keypoints is not None:
+            if num_instances:
+                assert len(keypoints) == num_instances
+            else:
+                num_instances = len(keypoints)
+            keypoints = self._convert_keypoints(keypoints)
+        if labels is not None:
+            assert len(labels) == num_instances
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+        if num_instances == 0:
+            return self.output
+        if boxes is not None and boxes.shape[1] == 5:
+            return self.overlay_rotated_instances(
+                boxes=boxes, labels=labels, assigned_colors=assigned_colors
+            )
+
+        # Display in largest to smallest order to reduce occlusion.
+        areas = None
+        if boxes is not None:
+            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
+        elif masks is not None:
+            areas = np.asarray([x.area() for x in masks])
+
+        if areas is not None:
+            sorted_idxs = np.argsort(-areas).tolist()
+            # Re-order overlapped instances in descending order.
+            boxes = boxes[sorted_idxs] if boxes is not None else None
+            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
+            rec = [rec[idx] for idx in sorted_idxs] if rec is not None else None
+            # rec_score = [rec_score[idx] for idx in sorted_idxs] if rec is not None else None
+            scores = [scores[idx] for idx in sorted_idxs] if scores is not None else None
+            # assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
+            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
+        #luyao#
+        assigned_colors = [[0,113.985,118.955],[216.75,82.875,24.99],[236.895, 176.97, 31.875],[125.97, 46.92, 141.78],[118.83, 171.87, 47.94],[76.755, 189.975, 237.915],[161.925, 19.89, 46.92],\
+            [255,140,0 ],[70,130,180 ],[128,128,0 ],[205,92,92 ],[128,0,128 ],[255,182,193],[255,255,0],[105,105,105],[0,255,255],[0,255,0 ],\
+            [210,180,140],[255,0,0 ],[0,139,139],[255,0,255],[127,255,0],[75,0,130],[32,178,170],[255,215,0],[219,112,147],[148,0,211 ],\
+                [100,149,237],[175,238,238 ],[143,188,143],[255,255,224 ],[244,164,96],[188,143,143],[192,192,192 ],[220,20,60],[218,112,214],[147,112,219]]
+        rec = [_ctc_decode_recognition(rrec) for rrec in rec]
+        # assigned_colors = [[1,140/255,0],[30/255,144/255,1],[148/255,0,211/255],[0,1,1],[1,0,0],\
+        #     [30/255,143/255,1],[0.94,0.5,0.5],[1,1,0],[0.5,0.5,0],[0.823,0.412,0.117],[0.58,0,0.827],[0.5,0,0]\
+        #     ,[0.82,0.41,0.12],[0.41,0.41,0.41],[0,0.54,0.54],[0.75,0.25,0.65],[0.2,0.6,0.8],[0.74,0,0.3],[0,1.0,0.4],[1,0.5,0.5],[0.5,0.5,1]\
+        #         ,[0.6,0,1],[0.56,0.56,0.3],[0,1,0],[1.0,0.0,0.4],[0.0,1.0,0.4],[0.0,0.5,1.0],[1,215/255,0]]
+        poly = []
+        for i in range(num_instances):
+            if masks is not None:
+                bbp = 0
+                for segment in masks[i].polygons:
+                    if bbp==0:
+                        poly.append(segment.astype(int).reshape(-1,2))
+                        bbp = 1
+        keep = py_cpu_pnms(poly,scores,0.5)
+        for i in range(num_instances):
+            # if rec[i] == ' ':
+            #     continue
+            if i not in keep:
+                continue
+            # color = assigned_colors[i]
+            # print(i)
+            color_ = assigned_colors[i%len(assigned_colors)]
+            color = [x/255 for x in color_]
+            # if boxes is not None:
+            #     self.draw_box(boxes[i], edge_color=color)
+            #luyao
+            # alpha = 0.6
+            H, W, _ = self.img.shape
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    segment = polygon2rbox(segment, H, W)
+                    segment = np.array(segment)
+                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
+            if labels is not None:
+                # first get a box
+                if boxes is not None:
+                    #luyao#
+                    x0, y0, x1, y1 = boxes[i]
+                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
+                    horiz_align = "left"
+                elif masks is not None:
+                    # skip small mask without polygon
+                    if len(masks[i].polygons) == 0:
+                        continue
+
+                    x0, y0, x1, y1 = masks[i].bbox()
+
+                    # draw text in the center (defined by median) when box is not drawn
+                    # median is less sensitive to outliers.
+                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
+                    horiz_align = "center"
+                else:
+                    continue  # drawing the box confidence for keypoints isn't very useful.
+                # for small objects, draw text at the side to avoid occlusion
+
+                instance_area = (y1 - y0) * (x1 - x0)
+                # print(x0,' ',x1,' ',y0,' ',y1,' ',self.output.height,' ', self.output.width)
+                #luyao#
+                if y0<5:
+                    text_pos = ((x0+x1)//2,(y0+y1)//2)
+                #luyao#
+                # if (
+                #     instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
+                #     or y1 - y0 < 40 * self.output.scale
+                # ):
+                #     if y1 >= self.output.height - 5:
+                #         text_pos = (x1, y0)
+                #     else:
+                #         text_pos = (x0, y1)
+
+                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
+                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+                font_size = (
+                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
+                    * 1.0
+                    * self._default_font_size
+                )
+                self.draw_text(
+                #    labels[i],
+                    # '',
+                    rec[i],
+                    text_pos,
+                    color=lighter_color,
+                    horizontal_alignment=horiz_align,
+                    font_size=font_size,
+                )
+        # draw keypoints
+        if keypoints is not None:
+            for keypoints_per_instance in keypoints:
+                self.draw_and_connect_keypoints(keypoints_per_instance)
+
+        return self.output
+
+    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
+        """
+        Args:
+            boxes (ndarray): an Nx5 numpy array of
+                (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image.
+            labels (list[str]): the text to be displayed for each instance.
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = len(boxes)
+
+        if assigned_colors is None:
+            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
+
+        if num_instances == 0:
+            return self.output
+
+        # Display in largest to smallest order to reduce occlusion.
+        if boxes is not None:
+            areas = boxes[:, 2] * boxes[:, 3]
+
+        sorted_idxs = np.argsort(-areas).tolist()
+        # Re-order overlapped instances in descending order.
+        boxes = boxes[sorted_idxs]
+        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+        colors = [assigned_colors[idx] for idx in sorted_idxs]
+
+        for i in range(num_instances):
+            self.draw_rotated_box_with_label(
+                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
+            )
+
+        return self.output
+
+    def draw_and_connect_keypoints(self, keypoints):
+        """
+        Draws keypoints of an instance and follows the rules for keypoint connections
+        to draw lines between appropriate keypoints. This follows color heuristics for
+        line color.
+
+        Args:
+            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
+                and the last dimension corresponds to (x, y, probability).
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        visible = {}
+        keypoint_names = self.metadata.get("keypoint_names")
+        for idx, keypoint in enumerate(keypoints):
+            # draw keypoint
+            x, y, prob = keypoint
+            if prob > _KEYPOINT_THRESHOLD:
+                self.draw_circle((x, y), color=_RED)
+                if keypoint_names:
+                    keypoint_name = keypoint_names[idx]
+                    visible[keypoint_name] = (x, y)
+
+        if self.metadata.get("keypoint_connection_rules"):
+            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
+                if kp0 in visible and kp1 in visible:
+                    x0, y0 = visible[kp0]
+                    x1, y1 = visible[kp1]
+                    color = tuple(x / 255.0 for x in color)
+                    self.draw_line([x0, x1], [y0, y1], color=color)
+
+        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
+        # Note that this strategy is specific to person keypoints.
+        # For other keypoints, it should just do nothing
+        try:
+            ls_x, ls_y = visible["left_shoulder"]
+            rs_x, rs_y = visible["right_shoulder"]
+            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
+        except KeyError:
+            pass
+        else:
+            # draw line from nose to mid-shoulder
+            nose_x, nose_y = visible.get("nose", (None, None))
+            if nose_x is not None:
+                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
+
+            try:
+                # draw line from mid-shoulder to mid-hip
+                lh_x, lh_y = visible["left_hip"]
+                rh_x, rh_y = visible["right_hip"]
+            except KeyError:
+                pass
+            else:
+                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
+                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
+        return self.output
+
+    """
+    Primitive drawing functions:
+    """
+
+    def draw_text(
+        self,
+        text,
+        position,
+        *,
+        font_size=None,
+        color="g",
+        horizontal_alignment="center",
+        rotation=0
+    ):
+        """
+        Args:
+            text (str): class label
+            position (tuple): a tuple of the x and y coordinates to place text on image.
+            font_size (int, optional): font of the text. If not provided, a font size
+                proportional to the image width is calculated and used.
+            color: color of the text. Refer to `matplotlib.colors` for full list
+                of formats that are accepted.
+            horizontal_alignment (str): see `matplotlib.text.Text`
+            rotation: rotation angle in degrees CCW
+
+        Returns:
+            output (VisImage): image object with text drawn.
+        """
+        if not font_size:
+            font_size = self._default_font_size
+
+        # print(font_size, self.output.scale)
+
+        # since the text background is dark, we don't want the text to be dark
+        # color = np.maximum(list(mplc.to_rgb(color)), 0.2)
+        # color[np.argmax(color)] = max(0.8, np.max(color))
+        #luyao#
+        color = 'w'
+        # font_size = 7.0
+        x, y = position
+        font_path = "VNFREE.ttf"
+        prop = mfm.FontProperties(fname=font_path)
+        self.output.ax.text(
+            x,
+            y,
+            text,
+            size=font_size * self.output.scale,
+            # family="sans-serif",
+            family="monospace",
+            # family="serif",
+            #luyao#
+            bbox={"facecolor": "black", "alpha": 0.0, "pad": 0.0, "edgecolor": "none"},
+            # bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
+            # verticalalignment="top",
+            verticalalignment="bottom",
+            horizontalalignment=horizontal_alignment,
+            color=color,
+            zorder=10,
+            
+            rotation=rotation,
+            fontproperties=prop
+            #luyao
+            # fontweight='light'
+        )
+        return self.output
+
+    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
+        """
+        Args:
+            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
+                are the coordinates of the image's top left corner. x1 and y1 are the
+                coordinates of the image's bottom right corner.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x0, y0, x1, y1 = box_coord
+        width = x1 - x0
+        height = y1 - y0
+
+        # linewidth = max(self._default_font_size / 16, 1)
+        # linewidth = max(self._default_font_size / 4, 1)
+        #luyao#
+        edge_color=[0.196,0.80,0.196]
+        alpha = 1.0
+        linewidth = 0.7
+
+        self.output.ax.add_patch(
+            mpl.patches.Rectangle(
+                (x0, y0),
+                width,
+                height,
+                fill=False,
+                edgecolor=edge_color,
+                linewidth=linewidth * self.output.scale,
+                alpha=alpha,
+                linestyle=line_style,
+            )
+        )
+        return self.output
+
+    def draw_rotated_box_with_label(
+        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
+    ):
+        """
+        Draw a rotated box with label on its top-left corner.
+
+        Args:
+            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
+                where cnt_x and cnt_y are the center coordinates of the box.
+                w and h are the width and height of the box. angle represents how
+                many degrees the box is rotated CCW with regard to the 0-degree box.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+            label (string): label for rotated box. It will not be rendered when set to None.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        cnt_x, cnt_y, w, h, angle = rotated_box
+        area = w * h
+        # use thinner lines when the box is small
+        linewidth = self._default_font_size / (
+            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
+        )
+
+        theta = angle * math.pi / 180.0
+        c = math.cos(theta)
+        s = math.sin(theta)
+        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
+        # x: left->right ; y: top->down
+        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
+        for k in range(4):
+            j = (k + 1) % 4
+            self.draw_line(
+                [rotated_rect[k][0], rotated_rect[j][0]],
+                [rotated_rect[k][1], rotated_rect[j][1]],
+                color=edge_color,
+                linestyle="--" if k == 1 else line_style,
+                linewidth=linewidth,
+            )
+
+        if label is not None:
+            text_pos = rotated_rect[1]  # topleft corner
+
+            height_ratio = h / np.sqrt(self.output.height * self.output.width)
+            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
+            font_size = (
+                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
+            )
+            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
+
+        return self.output
+
+    def draw_circle(self, circle_coord, color, radius=3):
+        """
+        Args:
+            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
+                of the center of the circle.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            radius (int): radius of the circle.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x, y = circle_coord
+        self.output.ax.add_patch(
+            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
+        )
+        return self.output
+
+    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
+        """
+        Args:
+            x_data (list[int]): a list containing x values of all the points being drawn.
+                Length of list should match the length of y_data.
+            y_data (list[int]): a list containing y values of all the points being drawn.
+                Length of list should match the length of x_data.
+            color: color of the line. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
+                for a full list of formats that are accepted.
+            linewidth (float or None): width of the line. When it's None,
+                a default value will be computed and used.
+
+        Returns:
+            output (VisImage): image object with line drawn.
+        """
+        if linewidth is None:
+            linewidth = self._default_font_size / 3
+        linewidth = max(linewidth, 1)
+        self.output.ax.add_line(
+            mpl.lines.Line2D(
+                x_data,
+                y_data,
+                linewidth=linewidth * self.output.scale,
+                color=color,
+                linestyle=linestyle,
+            )
+        )
+        return self.output
+
+    def draw_binary_mask(
+        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=0
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn in the object's center of mass.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component small than this will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = False
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+
+        if not mask.has_holes:
+            # draw polygons for regular masks
+            for segment in mask.polygons:
+                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
+                if area < (area_threshold or 0):
+                    continue
+                has_valid_segment = True
+                segment = segment.reshape(-1, 2)
+                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
+        else:
+            # TODO: Use Path/PathPatch to draw vector graphics:
+            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+            rgba = np.zeros(shape2d + (4,), dtype="float32")
+            rgba[:, :, :3] = color
+            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+            has_valid_segment = True
+            self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if text is not None and has_valid_segment:
+            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+            largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+            # draw text on the largest component, as well as other very large components.
+            for cid in range(1, _num_cc):
+                if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+                    # median is more stable than centroid
+                    # center = centroids[largest_component_id]
+                    center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+                    self.draw_text(text, center, color=lighter_color)
+        return self.output
+
+    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
+        """
+        Args:
+            segment: numpy array of shape Nx2, containing all the points in the polygon.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted. If not provided, a darker shade
+                of the polygon color will be used instead.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with polygon drawn.
+        """
+        #luyao#
+        # edge_color = []
+        if edge_color is None:
+            # make edge color darker than the polygon color
+            if alpha > 0.8:
+                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
+            else:
+                edge_color = color
+        edge_color = mplc.to_rgb(edge_color) + (1,)
+
+        polygon = mpl.patches.Polygon(
+            segment,
+            fill=True,
+            facecolor=mplc.to_rgb(color) + (alpha,),
+            #luyao# qudiaomaskyanse
+            # edgecolor=edge_color,
+            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
+        )
+        self.output.ax.add_patch(polygon)
+        return self.output
+
+    """
+    Internal methods:
+    """
+
+    def _jitter(self, color):
+        """
+        Randomly modifies given color to produce a slightly different color than the color given.
+
+        Args:
+            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
+                picked. The values in the list are in the [0.0, 1.0] range.
+
+        Returns:
+            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
+                color after being jittered. The values in the list are in the [0.0, 1.0] range.
+        """
+        color = mplc.to_rgb(color)
+        vec = np.random.rand(3)
+        # better to do it in another color space
+        vec = vec / np.linalg.norm(vec) * 0.5
+        res = np.clip(vec + color, 0, 1)
+        return tuple(res)
+
+    def _create_grayscale_image(self, mask=None):
+        """
+        Create a grayscale version of the original image.
+        The colors in masked area, if given, will be kept.
+        """
+        img_bw = self.img.astype("f4").mean(axis=2)
+        img_bw = np.stack([img_bw] * 3, axis=2)
+        if mask is not None:
+            img_bw[mask] = self.img[mask]
+        return img_bw
+
+    def _change_color_brightness(self, color, brightness_factor):
+        """
+        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
+        less or more saturation than the original color.
+
+        Args:
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
+                0 will correspond to no change, a factor in [-1.0, 0) range will result in
+                a darker color and a factor in (0, 1.0] range will result in a lighter color.
+
+        Returns:
+            modified_color (tuple[double]): a tuple containing the RGB values of the
+                modified color. Each value in the tuple is in the [0.0, 1.0] range.
+        """
+        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
+        color = mplc.to_rgb(color)
+        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
+        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
+        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
+        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
+        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
+        return modified_color
+
+    def _convert_boxes(self, boxes):
+        """
+        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
+        """
+        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+            return boxes.tensor.numpy()
+        else:
+            return np.asarray(boxes)
+
+    def _convert_masks(self, masks_or_polygons):
+        """
+        Convert different format of masks or polygons to a tuple of masks and polygons.
+
+        Returns:
+            list[GenericMask]:
+        """
+
+        m = masks_or_polygons
+        if isinstance(m, PolygonMasks):
+            m = m.polygons
+        if isinstance(m, BitMasks):
+            m = m.tensor.numpy()
+        if isinstance(m, torch.Tensor):
+            m = m.numpy()
+        ret = []
+        for x in m:
+            if isinstance(x, GenericMask):
+                ret.append(x)
+            else:
+                ret.append(GenericMask(x, self.output.height, self.output.width))
+        return ret
+
+    def _convert_keypoints(self, keypoints):
+        if isinstance(keypoints, Keypoints):
+            keypoints = keypoints.tensor
+        keypoints = np.asarray(keypoints)
+        return keypoints
+
+    def get_output(self):
+        """
+        Returns:
+            output (VisImage): the image output containing the visualizations added
+            to the image.
+        """
+        return self.output
+        
+def polygon2rbox(polygon, image_height, image_width):
+    poly = np.array(polygon).reshape((-1, 2)).astype(np.float32)
+    rect = cv2.minAreaRect(poly)
+    corners = cv2.boxPoints(rect)
+    corners = np.array(corners, dtype="int")
+    pts = get_tight_rect(corners, 0, 0, image_height, image_width, 1)
+    pts = list(map(int, pts))
+    return pts
+
+def get_tight_rect(points, start_x, start_y, image_height, image_width, scale):
+    points = list(points)
+    ps = sorted(points, key=lambda x: x[0])
+
+    if ps[1][1] > ps[0][1]:
+        px1 = ps[0][0] * scale + start_x
+        py1 = ps[0][1] * scale + start_y
+        px4 = ps[1][0] * scale + start_x
+        py4 = ps[1][1] * scale + start_y
+    else:
+        px1 = ps[1][0] * scale + start_x
+        py1 = ps[1][1] * scale + start_y
+        px4 = ps[0][0] * scale + start_x
+        py4 = ps[0][1] * scale + start_y
+    if ps[3][1] > ps[2][1]:
+        px2 = ps[2][0] * scale + start_x
+        py2 = ps[2][1] * scale + start_y
+        px3 = ps[3][0] * scale + start_x
+        py3 = ps[3][1] * scale + start_y
+    else:
+        px2 = ps[3][0] * scale + start_x
+        py2 = ps[3][1] * scale + start_y
+        px3 = ps[2][0] * scale + start_x
+        py3 = ps[2][1] * scale + start_y
+
+    px1 = min(max(px1, 1), image_width - 1)
+    px2 = min(max(px2, 1), image_width - 1)
+    px3 = min(max(px3, 1), image_width - 1)
+    px4 = min(max(px4, 1), image_width - 1)
+    py1 = min(max(py1, 1), image_height - 1)
+    py2 = min(max(py2, 1), image_height - 1)
+    py3 = min(max(py3, 1), image_height - 1)
+    py4 = min(max(py4, 1), image_height - 1)
+    return [px1, py1, px2, py2, px3, py3, px4, py4]
+
+dictionary = "aàáạảãâầấậẩẫăằắặẳẵAÀÁẠẢÃĂẰẮẶẲẴÂẦẤẬẨẪeèéẹẻẽêềếệểễEÈÉẸẺẼÊỀẾỆỂỄoòóọỏõôồốộổỗơờớợởỡOÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠiìíịỉĩIÌÍỊỈĨuùúụủũưừứựửữƯỪỨỰỬỮUÙÚỤỦŨyỳýỵỷỹYỲÝỴỶỸ"
+
+
+def make_groups():
+    groups = []
+    i = 0
+    while i < len(dictionary) - 5:
+        group = [c for c in dictionary[i : i + 6]]
+        i += 6
+        groups.append(group)
+    return groups
+
+
+groups = make_groups()
+
+TONES = ["", "ˋ", "ˊ", "﹒", "ˀ", "˜"]
+SOURCES = ["ă", "â", "Ă", "Â", "ê", "Ê", "ô", "ơ", "Ô", "Ơ", "ư", "Ư", "Đ", "đ"]
+TARGETS = ["aˇ", "aˆ", "Aˇ", "Aˆ", "eˆ", "Eˆ", "oˆ", "o˒", "Oˆ", "O˒", "u˒", "U˒", "D-", "d‑"]
+
+
+def correct_tone_position(word):
+    word = word[:-1]
+    if len(word) < 2:
+        pass
+    first_ord_char = ""
+    second_order_char = ""
+    for char in word:
+        for group in groups:
+            if char in group:
+                second_order_char = first_ord_char
+                first_ord_char = group[0]
+    if word[-1] == first_ord_char and second_order_char != "":
+        pair_chars = ["qu", "Qu", "qU", "QU", "gi", "Gi", "gI", "GI"]
+        for pair in pair_chars:
+            if pair in word and second_order_char in ["u", "U", "i", "I"]:
+                return first_ord_char
+        return second_order_char
+    return first_ord_char
+
+
+def decoder(recognition):
+    for char in TARGETS:
+        recognition = recognition.replace(char, SOURCES[TARGETS.index(char)])
+    if len(recognition) < 1:
+        return recognition
+    if recognition[-1] in TONES:
+        if len(recognition) < 2:
+            return recognition
+        replace_char = correct_tone_position(recognition)
+        tone = recognition[-1]
+        recognition = recognition[:-1]
+        for group in groups:
+            if replace_char in group:
+                recognition = recognition.replace(replace_char, group[TONES.index(tone)])
+    return recognition
diff --git a/src/sts/dev/README.md b/src/sts/dev/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bec811ad002a016f2137d9d0ea61c27ee5e78992
--- /dev/null
+++ b/src/sts/dev/README.md
@@ -0,0 +1,7 @@
+
+## Some scripts for developers to use, include:
+
+- `linter.sh`: lint the codebase before commit.
+- `run_{inference,instant}_tests.sh`: run inference/training for a few iterations.
+   Note that these tests require 2 GPUs.
+- `parse_results.sh`: parse results from a log file.
diff --git a/src/sts/dev/linter.sh b/src/sts/dev/linter.sh
new file mode 100644
index 0000000000000000000000000000000000000000..40e8664b230a5847fca044a5acb5e944096401ac
--- /dev/null
+++ b/src/sts/dev/linter.sh
@@ -0,0 +1,41 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# Run this script at project root by "./dev/linter.sh" before you commit
+
+{
+  black --version | grep -E "20.8b1" > /dev/null
+} || {
+  echo "Linter requires 'black==20.8b1' !"
+  exit 1
+}
+
+ISORT_VERSION=$(isort --version-number)
+if [[ "$ISORT_VERSION" != 4.3* ]]; then
+  echo "Linter requires isort==4.3.21 !"
+  exit 1
+fi
+
+set -v
+
+echo "Running isort ..."
+isort -y -sp . --atomic
+
+echo "Running black ..."
+black -l 100 .
+
+echo "Running flake8 ..."
+if [ -x "$(command -v flake8-3)" ]; then
+  flake8-3 .
+else
+  python3 -m flake8 .
+fi
+
+# echo "Running mypy ..."
+# Pytorch does not have enough type annotations
+# mypy detectron2/solver detectron2/structures detectron2/config
+
+echo "Running clang-format ..."
+find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i
+
+command -v arc > /dev/null && arc lint
diff --git a/src/sts/dev/packaging/README.md b/src/sts/dev/packaging/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..afcb67262dc623d1733d9e3a001ca657ac4ad53a
--- /dev/null
+++ b/src/sts/dev/packaging/README.md
@@ -0,0 +1,17 @@
+
+## To build a cu101 wheel for release:
+
+```
+$ nvidia-docker run -it --storage-opt "size=20GB" --name pt  pytorch/manylinux-cuda101
+# inside the container:
+# git clone https://github.com/facebookresearch/detectron2/
+# cd detectron2
+# export CU_VERSION=cu101 D2_VERSION_SUFFIX= PYTHON_VERSION=3.7 PYTORCH_VERSION=1.6
+# ./dev/packaging/build_wheel.sh
+```
+
+## To build all wheels for `CUDA {9.2,10.0,10.1}` x `Python {3.6,3.7,3.8}`:
+```
+./dev/packaging/build_all_wheels.sh
+./dev/packaging/gen_wheel_index.sh /path/to/wheels
+```
diff --git a/src/sts/dev/packaging/build_all_wheels.sh b/src/sts/dev/packaging/build_all_wheels.sh
new file mode 100644
index 0000000000000000000000000000000000000000..00bdb9e595e691f8dbd2539f03a9b10dab97f60f
--- /dev/null
+++ b/src/sts/dev/packaging/build_all_wheels.sh
@@ -0,0 +1,70 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+[[ -d "dev/packaging" ]] || {
+  echo "Please run this script at detectron2 root!"
+  exit 1
+}
+
+build_one() {
+  cu=$1
+  pytorch_ver=$2
+
+  case "$cu" in
+    cu*)
+      container_name=manylinux-cuda${cu/cu/}
+      ;;
+    cpu)
+      container_name=manylinux-cuda101
+      ;;
+    *)
+      echo "Unrecognized cu=$cu"
+      exit 1
+      ;;
+  esac
+
+  echo "Launching container $container_name ..."
+  container_id="$container_name"_"$cu"_"$pytorch_ver"
+
+  py_versions=(3.6 3.7 3.8)
+  if [[ $pytorch_ver == "1.8" ]]; then
+    py_versions+=(3.9)
+  fi
+
+  for py in "${py_versions[@]}"; do
+    docker run -itd \
+      --name "$container_id" \
+      --mount type=bind,source="$(pwd)",target=/detectron2 \
+      pytorch/$container_name
+
+    cat <<EOF | docker exec -i $container_id sh
+      export CU_VERSION=$cu D2_VERSION_SUFFIX=+$cu PYTHON_VERSION=$py
+      export PYTORCH_VERSION=$pytorch_ver
+      cd /detectron2 && ./dev/packaging/build_wheel.sh
+EOF
+
+    docker container stop $container_id
+    docker container rm $container_id
+  done
+}
+
+
+if [[ -n "$1" ]] && [[ -n "$2" ]]; then
+  build_one "$1" "$2"
+else
+  build_one cu111 1.8
+  build_one cu102 1.8
+  build_one cu101 1.8
+  build_one cpu 1.8
+
+  build_one cu110 1.7
+  build_one cu102 1.7
+  build_one cu101 1.7
+  build_one cu92 1.7
+  build_one cpu 1.7
+
+  build_one cu102 1.6
+  build_one cu101 1.6
+  build_one cu92 1.6
+  build_one cpu 1.6
+fi
diff --git a/src/sts/dev/packaging/build_wheel.sh b/src/sts/dev/packaging/build_wheel.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2d9facccdcb9f5d46014a9fb253429ff5f45a127
--- /dev/null
+++ b/src/sts/dev/packaging/build_wheel.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+set -ex
+
+ldconfig  # https://github.com/NVIDIA/nvidia-docker/issues/854
+
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+. "$script_dir/pkg_helpers.bash"
+
+echo "Build Settings:"
+echo "CU_VERSION: $CU_VERSION"                 # e.g. cu101
+echo "D2_VERSION_SUFFIX: $D2_VERSION_SUFFIX"   # e.g. +cu101 or ""
+echo "PYTHON_VERSION: $PYTHON_VERSION"         # e.g. 3.6
+echo "PYTORCH_VERSION: $PYTORCH_VERSION"       # e.g. 1.4
+
+setup_cuda
+setup_wheel_python
+
+yum install ninja-build -y
+ln -sv /usr/bin/ninja-build /usr/bin/ninja || true
+
+pip_install pip numpy -U
+pip_install "torch==$PYTORCH_VERSION" \
+	-f https://download.pytorch.org/whl/"$CU_VERSION"/torch_stable.html
+
+# use separate directories to allow parallel build
+BASE_BUILD_DIR=build/$CU_VERSION-py$PYTHON_VERSION-pt$PYTORCH_VERSION
+python setup.py \
+  build -b "$BASE_BUILD_DIR" \
+  bdist_wheel -b "$BASE_BUILD_DIR/build_dist" -d "wheels/$CU_VERSION/torch$PYTORCH_VERSION"
+rm -rf "$BASE_BUILD_DIR"
diff --git a/src/sts/dev/packaging/gen_install_table.py b/src/sts/dev/packaging/gen_install_table.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc7cf5aad4d6d320c40c4e28a663ccf4a6fe7d13
--- /dev/null
+++ b/src/sts/dev/packaging/gen_install_table.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+# -*- coding: utf-8 -*-
+
+import argparse
+
+template = """<details><summary> install </summary><pre><code>\
+python -m pip install detectron2{d2_version} -f \\
+  https://dl.fbaipublicfiles.com/detectron2/wheels/{cuda}/torch{torch}/index.html
+</code></pre> </details>"""
+CUDA_SUFFIX = {
+    "11.1": "cu111",
+    "11.0": "cu110",
+    "10.2": "cu102",
+    "10.1": "cu101",
+    "10.0": "cu100",
+    "9.2": "cu92",
+    "cpu": "cpu",
+}
+
+
+def gen_header(torch_versions):
+    return '<table class="docutils"><tbody><th width="80"> CUDA </th>' + "".join(
+        [
+            '<th valign="bottom" align="left" width="100">torch {}</th>'.format(t)
+            for t in torch_versions
+        ]
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--d2-version", help="detectron2 version number, default to empty")
+    args = parser.parse_args()
+    d2_version = f"=={args.d2_version}" if args.d2_version else ""
+
+    all_versions = (
+        [("1.6", k) for k in ["10.2", "10.1", "9.2", "cpu"]]
+        + [("1.7", k) for k in ["11.0", "10.2", "10.1", "9.2", "cpu"]]
+        + [("1.8", k) for k in ["11.1", "10.2", "10.1", "cpu"]]
+    )
+
+    torch_versions = sorted({k[0] for k in all_versions}, key=float, reverse=True)
+    cuda_versions = sorted(
+        {k[1] for k in all_versions}, key=lambda x: float(x) if x != "cpu" else 0, reverse=True
+    )
+
+    table = gen_header(torch_versions)
+    for cu in cuda_versions:
+        table += f""" <tr><td align="left">{cu}</td>"""
+        cu_suffix = CUDA_SUFFIX[cu]
+        for torch in torch_versions:
+            if (torch, cu) in all_versions:
+                cell = template.format(d2_version=d2_version, cuda=cu_suffix, torch=torch)
+            else:
+                cell = ""
+            table += f"""<td align="left">{cell} </td> """
+        table += "</tr>"
+    table += "</tbody></table>"
+    print(table)
diff --git a/src/sts/dev/packaging/gen_wheel_index.sh b/src/sts/dev/packaging/gen_wheel_index.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a5bb0dd990bde08bc2cae9442bb1c7486c29e4e1
--- /dev/null
+++ b/src/sts/dev/packaging/gen_wheel_index.sh
@@ -0,0 +1,45 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+
+root=$1
+if [[ -z "$root" ]]; then
+  echo "Usage: ./gen_wheel_index.sh /path/to/wheels"
+  exit
+fi
+
+export LC_ALL=C  # reproducible sort
+# NOTE: all sort in this script might not work when xx.10 is released
+
+index=$root/index.html
+
+cd "$root"
+for cu in cpu cu92 cu100 cu101 cu102 cu110 cu111; do
+  cd "$root/$cu"
+  echo "Creating $PWD/index.html ..."
+  # First sort by torch version, then stable sort by d2 version with unique.
+  # As a result, the latest torch version for each d2 version is kept.
+  for whl in $(find -type f -name '*.whl' -printf '%P\n' \
+    | sort -k 1 -r  | sort -t '/' -k 2 --stable -r --unique); do
+    echo "<a href=\"${whl/+/%2B}\">$whl</a><br>"
+  done > index.html
+
+
+  for torch in torch*; do
+    cd "$root/$cu/$torch"
+
+    # list all whl for each cuda,torch version
+    echo "Creating $PWD/index.html ..."
+    for whl in $(find . -type f -name '*.whl' -printf '%P\n' | sort -r); do
+      echo "<a href=\"${whl/+/%2B}\">$whl</a><br>"
+    done > index.html
+  done
+done
+
+cd "$root"
+# Just list everything:
+echo "Creating $index ..."
+for whl in $(find . -type f -name '*.whl' -printf '%P\n' | sort -r); do
+  echo "<a href=\"${whl/+/%2B}\">$whl</a><br>"
+done > "$index"
+
diff --git a/src/sts/dev/packaging/pkg_helpers.bash b/src/sts/dev/packaging/pkg_helpers.bash
new file mode 100644
index 0000000000000000000000000000000000000000..65c6114ae6943feca258dca6ace99c221c9bbb55
--- /dev/null
+++ b/src/sts/dev/packaging/pkg_helpers.bash
@@ -0,0 +1,71 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# Function to retry functions that sometimes timeout or have flaky failures
+retry () {
+    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+}
+# Install with pip a bit more robustly than the default
+pip_install() {
+  retry pip install --progress-bar off "$@"
+}
+
+
+setup_cuda() {
+  # Now work out the CUDA settings
+  # Like other torch domain libraries, we choose common GPU architectures only.
+  # See more details at https://github.com/pytorch/pytorch/blob/master/torch/utils/cpp_extension.py#L1363
+  export FORCE_CUDA=1
+  case "$CU_VERSION" in
+    cu112)
+      export CUDA_HOME=/usr/local/cuda-11.2/
+      export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX;8.0;8.6+PTX"
+      ;;
+    cu111)
+      export CUDA_HOME=/usr/local/cuda-11.1/
+      export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX;8.0;8.6+PTX"
+      ;;
+    cu110)
+      export CUDA_HOME=/usr/local/cuda-11.0/
+      export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX;8.0+PTX"
+      ;;
+    cu102)
+      export CUDA_HOME=/usr/local/cuda-10.2/
+      export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX"
+      ;;
+    cu101)
+      export CUDA_HOME=/usr/local/cuda-10.1/
+      export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX"
+      ;;
+    cu100)
+      export CUDA_HOME=/usr/local/cuda-10.0/
+      export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX"
+      ;;
+    cu92)
+      export CUDA_HOME=/usr/local/cuda-9.2/
+      export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0+PTX"
+      ;;
+    cpu)
+      unset FORCE_CUDA
+      export CUDA_VISIBLE_DEVICES=
+      ;;
+    *)
+      echo "Unrecognized CU_VERSION=$CU_VERSION"
+      exit 1
+      ;;
+  esac
+}
+
+setup_wheel_python() {
+  case "$PYTHON_VERSION" in
+    3.6) python_abi=cp36-cp36m ;;
+    3.7) python_abi=cp37-cp37m ;;
+    3.8) python_abi=cp38-cp38 ;;
+    3.9) python_abi=cp39-cp39 ;;
+    *)
+      echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION"
+      exit 1
+      ;;
+  esac
+  export PATH="/opt/python/$python_abi/bin:$PATH"
+}
diff --git a/src/sts/dev/parse_results.sh b/src/sts/dev/parse_results.sh
new file mode 100644
index 0000000000000000000000000000000000000000..80768a4005753447c49339790fe66c9b82a80aaf
--- /dev/null
+++ b/src/sts/dev/parse_results.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# A shell script that parses metrics from the log file.
+# Make it easier for developers to track performance of models.
+
+LOG="$1"
+
+if [[ -z "$LOG" ]]; then
+	echo "Usage: $0 /path/to/log/file"
+	exit 1
+fi
+
+# [12/15 11:47:32] trainer INFO: Total training time: 12:15:04.446477 (0.4900 s / it)
+# [12/15 11:49:03] inference INFO: Total inference time: 0:01:25.326167 (0.13652186737060548 s / img per device, on 8 devices)
+# [12/15 11:49:03] inference INFO: Total inference pure compute time: .....
+
+# training time
+trainspeed=$(grep -o 'Overall training.*' "$LOG" | grep -Eo '\(.*\)' | grep -o '[0-9\.]*')
+echo "Training speed: $trainspeed s/it"
+
+# inference time: there could be multiple inference during training
+inferencespeed=$(grep -o 'Total inference pure.*' "$LOG" | tail -n1 | grep -Eo '\(.*\)' | grep -o '[0-9\.]*' | head -n1)
+echo "Inference speed: $inferencespeed s/it"
+
+# [12/15 11:47:18] trainer INFO: eta: 0:00:00  iter: 90000  loss: 0.5407 (0.7256)  loss_classifier: 0.1744 (0.2446)  loss_box_reg: 0.0838 (0.1160)  loss_mask: 0.2159 (0.2722)  loss_objectness: 0.0244 (0.0429)  loss_rpn_box_reg: 0.0279 (0.0500)  time: 0.4487 (0.4899)  data: 0.0076 (0.0975) lr: 0.000200  max mem: 4161
+memory=$(grep -o 'max[_ ]mem: [0-9]*' "$LOG" | tail -n1 | grep -o '[0-9]*')
+echo "Training memory: $memory MB"
+
+echo "Easy to copypaste:"
+echo "$trainspeed","$inferencespeed","$memory"
+
+echo "------------------------------"
+
+# [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: bbox
+# [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl
+# [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0017,0.0024,0.0017,0.0005,0.0019,0.0011
+# [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: segm
+# [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl
+# [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0014,0.0021,0.0016,0.0005,0.0016,0.0011
+
+echo "COCO Results:"
+num_tasks=$(grep -o 'copypaste:.*Task.*' "$LOG" | sort -u | wc -l)
+# each task has 3 lines
+grep -o 'copypaste:.*' "$LOG" | cut -d ' ' -f 2- | tail -n $((num_tasks * 3))
diff --git a/src/sts/dev/run_inference_tests.sh b/src/sts/dev/run_inference_tests.sh
new file mode 100644
index 0000000000000000000000000000000000000000..bc9dcc56f06f79fc5efa42c04ffdc07c2787e3ac
--- /dev/null
+++ b/src/sts/dev/run_inference_tests.sh
@@ -0,0 +1,44 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+BIN="python tools/train_net.py"
+OUTPUT="inference_test_output"
+NUM_GPUS=2
+
+CFG_LIST=( "${@:1}" )
+
+if [ ${#CFG_LIST[@]} -eq 0 ]; then
+  CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml )
+fi
+
+echo "========================================================================"
+echo "Configs to run:"
+echo "${CFG_LIST[@]}"
+echo "========================================================================"
+
+
+for cfg in "${CFG_LIST[@]}"; do
+    echo "========================================================================"
+    echo "Running $cfg ..."
+    echo "========================================================================"
+    $BIN \
+      --eval-only \
+      --num-gpus $NUM_GPUS \
+      --config-file "$cfg" \
+      OUTPUT_DIR $OUTPUT
+      rm -rf $OUTPUT
+done
+
+
+echo "========================================================================"
+echo "Running demo.py ..."
+echo "========================================================================"
+DEMO_BIN="python demo/demo.py"
+COCO_DIR=datasets/coco/val2014
+mkdir -pv $OUTPUT
+
+set -v
+
+$DEMO_BIN --config-file ./configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml \
+  --input $COCO_DIR/COCO_val2014_0000001933* --output $OUTPUT
+rm -rf $OUTPUT
diff --git a/src/sts/dev/run_instant_tests.sh b/src/sts/dev/run_instant_tests.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9fd9ba0c239d3e982c17711c9db872de3730decf
--- /dev/null
+++ b/src/sts/dev/run_instant_tests.sh
@@ -0,0 +1,27 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+BIN="python tools/train_net.py"
+OUTPUT="instant_test_output"
+NUM_GPUS=2
+
+CFG_LIST=( "${@:1}" )
+if [ ${#CFG_LIST[@]} -eq 0 ]; then
+  CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml )
+fi
+
+echo "========================================================================"
+echo "Configs to run:"
+echo "${CFG_LIST[@]}"
+echo "========================================================================"
+
+for cfg in "${CFG_LIST[@]}"; do
+    echo "========================================================================"
+    echo "Running $cfg ..."
+    echo "========================================================================"
+    $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \
+      SOLVER.IMS_PER_BATCH $(($NUM_GPUS * 2)) \
+      OUTPUT_DIR "$OUTPUT"
+    rm -rf "$OUTPUT"
+done
+
diff --git a/src/sts/dist/detectron2-0.4-py3.8-linux-x86_64.egg b/src/sts/dist/detectron2-0.4-py3.8-linux-x86_64.egg
new file mode 100644
index 0000000000000000000000000000000000000000..d42e498e213e19e7776c31a76874c45fcf7385cc
--- /dev/null
+++ b/src/sts/dist/detectron2-0.4-py3.8-linux-x86_64.egg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c987d3549783f79ee5658d644895669900b886c34cd339c89b10a7dffaa3ac55
+size 7329147
diff --git a/src/sts/projects/SWINTS/LME/MaskLoader.py b/src/sts/projects/SWINTS/LME/MaskLoader.py
new file mode 100644
index 0000000000000000000000000000000000000000..389724e8aadf2a28e8e88bf6f052eb9189184f28
--- /dev/null
+++ b/src/sts/projects/SWINTS/LME/MaskLoader.py
@@ -0,0 +1,83 @@
+# coding:utf-8
+
+import os
+import json
+import numpy as np
+
+import torch.utils.data as data
+
+from detectron2.structures import (
+    Boxes,
+    PolygonMasks,
+    BoxMode
+)
+
+
+DATASETS = {
+        "coco_2017_train": {
+            "img_dir": "coco/train2017",
+            "ann_file": "coco/annotations/instances_train2017.json"
+        },
+        "coco_2017_val": {
+            "img_dir": "coco/val2017",
+            "ann_file": "coco/annotations/instances_val2017.json"
+        }
+}
+
+
+class MaskLoader(data.Dataset):
+    """
+    Dataloader for Local Mask.
+
+    Arguments:
+        root (string): filepath to dataset folder.
+        dataset (string): mask to use (eg. 'train', 'val').
+        size (tuple): The size used for train/val (height, width).
+        transform (callable, optional): transformation to perform on the input mask.
+
+    """
+
+    def __init__(self, root="datasets", dataset="coco_2017_train", size=28, transform=False):
+        self.root = root
+        self.dataset = dataset
+        self.transform = transform
+
+        if isinstance(size, int):
+            self.size = size
+        else:
+            raise TypeError
+
+        data_info = DATASETS[dataset]
+        img_dir, ann_file = data_info['img_dir'], data_info['ann_file']
+        img_dir = os.path.join(self.root, img_dir)  # actually we do not use it.
+        ann_file = os.path.join(self.root, ann_file)
+
+        with open(ann_file, 'r') as f:
+            anns = json.load(f)
+        anns = anns['annotations']
+        coco = list()
+        for ann in anns:
+            if ann.get('iscrowd', 0) == 0:
+                coco.append(ann)
+        self.coco = coco
+        print("Removed {} images with no usable annotations. {} images left.".format(
+              len(anns) - len(self.coco), len(self.coco)))
+
+    def __len__(self):
+        return len(self.coco)
+
+    def __getitem__(self, index):
+        ann = self.coco[index]
+
+        # bbox transform.
+        bbox = np.array([ann["bbox"]])  # xmin, ymin, w, h
+        bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)  # x1y1x2y2
+        bbox = Boxes(bbox)
+
+        # label
+
+        # mask transform.
+        mask = PolygonMasks([ann["segmentation"]])
+        mask = mask.crop_and_resize(bbox.tensor, self.size).float()
+
+        return mask
diff --git a/src/sts/projects/SWINTS/LME/__init__.py b/src/sts/projects/SWINTS/LME/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2869e4def5b05af3a2bd250b535dd2ba06c73d0
--- /dev/null
+++ b/src/sts/projects/SWINTS/LME/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .MaskLoader import MaskLoader
+from .utils import inverse_sigmoid, direct_sigmoid, IOUMetric, transform, inverse_transform
+
+__all__ = ["MaskLoader", "IOUMetric",
+           "inverse_sigmoid", "direct_sigmoid",
+           "transform", "inverse_transform"]
diff --git a/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_100_siz28.npz b/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_100_siz28.npz
new file mode 100644
index 0000000000000000000000000000000000000000..cddfedff741109d77330655b708d3665f665839c
--- /dev/null
+++ b/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_100_siz28.npz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e558053fcfe574fd18154f9627c9853c7d91a0483e535fd2995ea54c912cd709
+size 318576
diff --git a/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_40_siz28.npz b/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_40_siz28.npz
new file mode 100644
index 0000000000000000000000000000000000000000..8a6ea7a46f5a869c3120281c404e2875f838827a
--- /dev/null
+++ b/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_40_siz28.npz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd94c261f4518d78bb19a983e55ac8fb3a0e7a8a3ccd26ff15976c7f498210f6
+size 129936
diff --git a/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz b/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz
new file mode 100644
index 0000000000000000000000000000000000000000..d4fff044e2fcc16ec0905f773368de4277e06207
--- /dev/null
+++ b/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8bab50c273548af4741716d0e08978c74847f026c8d84eb494bb377c0c48e21
+size 192816
diff --git a/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_80_siz28.npz b/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_80_siz28.npz
new file mode 100644
index 0000000000000000000000000000000000000000..b44009ca75f0d63596408ad3d4c9d0c3f8558fee
--- /dev/null
+++ b/src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_80_siz28.npz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab772f2a1d79837722b5ed5f3a5835829def726ceb5d5095cfe87a0495a20876
+size 255696
diff --git a/src/sts/projects/SWINTS/LME/mask_evaluation.py b/src/sts/projects/SWINTS/LME/mask_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b73625f4136394fe60c162f9b06b1aa63631454
--- /dev/null
+++ b/src/sts/projects/SWINTS/LME/mask_evaluation.py
@@ -0,0 +1,101 @@
+# coding:utf-8
+
+import os
+import argparse
+import numpy as np
+from torch.utils.data import DataLoader
+
+from MaskLoader import MaskLoader
+from utils import (
+    IOUMetric,
+    transform,
+    inverse_transform,
+    direct_sigmoid,
+    inverse_sigmoid
+)
+
+
+VALUE_MAX = 0.05
+VALUE_MIN = 0.01
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Evaluation for PCA Mask Encoding.')
+    parser.add_argument('--root', default='datasets', type=str)
+    parser.add_argument('--dataset', default='coco_2017_val', type=str)
+    parser.add_argument('--matrix', default='./projects/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_128.npz', type=str)
+    # mask encoding params.
+    parser.add_argument('--mask_size', default=28, type=int)
+    parser.add_argument('--n_components', default=128, type=int)
+    parser.add_argument('--class_agnostic', default=True, type=bool)
+    parser.add_argument('--whiten', default=True, type=bool)
+    parser.add_argument('--sigmoid', default=True, type=bool)
+    parser.add_argument('--batch-size', default=1024, type=int)
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    # parse args.
+    mask_size = args.mask_size
+    n_components = args.n_components
+    class_agnostic = args.class_agnostic
+    whiten = args.whiten
+    sigmoid = args.sigmoid
+
+    dataset_root = './datasets/'
+    matrix_path = args.matrix #os.path.join(dataset_root, )
+
+    # load matrix.
+    print("Loading matrix parameters: {}".format(matrix_path))
+    parameters = np.load(matrix_path)
+    components_c = parameters['components_c']
+    mean_c = parameters['mean_c']
+    ratio_c = parameters['ratio_c']
+    explained_variance_c = parameters['explained_variance_c']
+    if class_agnostic:
+        components_c = np.squeeze(components_c)
+        mean_c = np.squeeze(mean_c)
+        explained_variance_c = np.squeeze(explained_variance_c)
+        assert n_components == components_c.shape[0], \
+            print("The n_components in component_ must equal to the supposed shape.")
+    else:
+        # TODO: We have not achieve the function in class-specific.
+        raise NotImplementedError
+
+    # build data loader.
+    mask_data = MaskLoader(root=dataset_root, dataset=args.dataset, size=mask_size)
+    mask_loader = DataLoader(mask_data, batch_size=args.batch_size, shuffle=False, num_workers=4)
+    size_data = len(mask_loader)
+
+    # evaluation.
+    IoUevaluate = IOUMetric(2)
+    print("Start Eva ...")
+    for i, masks in enumerate(mask_loader):
+        print("Eva [{} / {}]".format(i, size_data))
+        # generate the reconstruction mask.
+        masks = masks.view(masks.shape[0], -1).numpy()
+        masks = masks.astype(np.float32)
+        # pre-process.
+        if sigmoid:
+            value_random = VALUE_MAX * np.random.rand(masks.shape[0], masks.shape[1])
+            value_random = np.maximum(value_random, VALUE_MIN)
+            masks_random = np.where(masks > value_random, 1 - value_random, value_random)
+            masks_random = inverse_sigmoid(masks_random)
+        else:
+            masks_random = masks
+        # --> encode --> decode.
+        mask_rc = transform(masks_random, components_=components_c, explained_variance_=explained_variance_c,
+                            mean_=mean_c, whiten=whiten)
+        mask_rc = inverse_transform(mask_rc, components_=components_c, explained_variance_=explained_variance_c,
+                                    mean_=mean_c, whiten=whiten)
+        # post-process.
+        if sigmoid:
+            mask_rc = direct_sigmoid(mask_rc)
+        # eva.
+        mask_rc = np.where(mask_rc >= 0.5, 1, 0)
+        IoUevaluate.add_batch(mask_rc, masks)
+
+    _, _, _, mean_iu, _ = IoUevaluate.evaluate()
+    print("The mIoU for {}: {}".format(args.matrix, mean_iu))
diff --git a/src/sts/projects/SWINTS/LME/mask_generation.py b/src/sts/projects/SWINTS/LME/mask_generation.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bceba7ebf5cf1e7f7543c77f860b161f53cf96f
--- /dev/null
+++ b/src/sts/projects/SWINTS/LME/mask_generation.py
@@ -0,0 +1,113 @@
+# coding:utf-8
+
+import os
+import argparse
+import time
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from sklearn.decomposition import IncrementalPCA
+
+from MaskLoader import MaskLoader
+from utils import inverse_sigmoid
+
+
+VALUE_MAX = 0.05
+VALUE_MIN = 0.01
+
+
+def mask_encoding(masks, n_components=60, class_agnostic=True, whiten=True, sigmoid=True, batch_size=1024):
+    components_c = []
+    mean_c = []
+    ratio_c = []
+    explained_variance_c = []
+    if class_agnostic:
+        if sigmoid:
+            value_random = VALUE_MAX * np.random.rand(masks.shape[0], masks.shape[1])
+            value_random = np.maximum(value_random, VALUE_MIN)
+            masks = np.where(masks > value_random, 1-value_random, value_random)
+            masks = inverse_sigmoid(masks)
+        pca = IncrementalPCA(n_components=n_components, copy=False, whiten=whiten, batch_size=batch_size)
+        pca.fit(masks)
+        components_c.append(pca.components_[np.newaxis, :, :])
+        mean_c.append(pca.mean_[np.newaxis, :])
+        ratio_c.append(pca.explained_variance_ratio_[np.newaxis, :])
+        explained_variance_c.append(pca.explained_variance_[np.newaxis, :])
+        ratio = pca.explained_variance_ratio_.sum()
+    else:
+        # TODO: We have not achieve the function in class-specific.
+        raise NotImplemented
+
+    return components_c, mean_c, ratio_c, explained_variance_c, ratio
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='PCA Mask Encoding for local mask.')
+    parser.add_argument('--root', default='datasets', type=str)
+    parser.add_argument('--dataset', default='coco_2017_train', type=str)
+    parser.add_argument('--output', default='./projects/LME', type=str)
+    # mask encoding params.
+    parser.add_argument('--mask_size', default=28, type=int)
+    parser.add_argument('--n_components', default=128, type=int)
+    parser.add_argument('--class_agnostic', default=True, type=bool)
+    parser.add_argument('--whiten', default=True, type=bool)
+    parser.add_argument('--sigmoid', default=True, type=bool)
+    parser.add_argument('--batch-size', default=1024, type=int)
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    # parse args.
+    mask_size = args.mask_size
+    n_components = args.n_components
+    class_agnostic = args.class_agnostic
+    whiten = args.whiten
+    sigmoid = args.sigmoid
+
+    dataset_root = './datasets/'
+    output_dir = args.output
+    os.makedirs(output_dir, exist_ok=True)
+
+    # build data loader.
+    mask_data = MaskLoader(root=dataset_root, dataset=args.dataset, size=mask_size)
+    mask_loader = DataLoader(mask_data, batch_size=args.batch_size, shuffle=False, num_workers=4)
+
+    # loading masks.
+    masks = list()
+    print("Start Loading Masks.")
+    tic = time.time()
+    for mask in mask_loader:
+        masks.append(mask.squeeze(1))
+    toc = time.time() - tic
+    print("Finish Loading Masks in {}s.".format(toc))
+    masks = torch.cat(masks, 0)
+    masks = masks.view(masks.shape[0], -1).numpy()
+    masks = masks.astype(np.float32)
+
+    # mask encoding.
+    print("Start to mask encoding ...")
+    print("It may take several times, please wait patiently ...")
+    tic = time.time()
+    components_c, mean_c, ratio_c, explained_variance_c, ratio = \
+        mask_encoding(masks, n_components, class_agnostic, whiten, sigmoid, args.batch_size)
+    toc = time.time() - tic
+    print("Finish the mask encoding in {}s.".format(toc))
+
+    components_c = np.concatenate(components_c).mean(0)[np.newaxis, :, :].astype(np.float32)
+    mean_c = np.concatenate(mean_c).mean(0)[np.newaxis, :].astype(np.float32)
+    ratio_c = np.concatenate(ratio_c).mean(0)[np.newaxis, :].astype(np.float32)
+    explained_variance_c = np.concatenate(explained_variance_c).mean(0)[np.newaxis, :].astype(np.float32)
+    print("The mean variance_ratio for all categories is {}".format(np.mean(ratio)))
+
+    # save the parameters.
+    output_path = os.path.join(output_dir, args.dataset + '_class_agnostic' + str(class_agnostic)
+                               + '_whiten' + str(whiten) + '_sigmoid' + str(sigmoid) + '_' + str(n_components) + '_siz' + str(mask_size)
+                               + '.npz')
+    print("Save the local mask encoding matrix: " + output_path)
+    np.savez(output_path,
+             components_c=components_c,
+             mean_c=mean_c,
+             ratio_c=ratio_c,
+             explained_variance_c=explained_variance_c)
diff --git a/src/sts/projects/SWINTS/LME/utils.py b/src/sts/projects/SWINTS/LME/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d93f1a7e69ba7effcf113af2b303dd8f8a6088a
--- /dev/null
+++ b/src/sts/projects/SWINTS/LME/utils.py
@@ -0,0 +1,120 @@
+# coding:utf-8
+
+import numpy as np
+
+
+def direct_sigmoid(x):
+    """Apply the sigmoid operation.
+    """
+    y = 1./(1.+1./np.exp(x))
+    dy = y*(1-y)
+    return y
+
+
+def inverse_sigmoid(x):
+    """Apply the inverse sigmoid operation.
+            y = -ln(1-x/x)
+    """
+    y = -1 * np.log((1-x)/x)
+    return y
+
+
+def transform(X, components_, explained_variance_, mean_=None, whiten=False):
+    """Apply dimensionality reduction to X.
+    X is projected on the first principal components previously extracted
+    from a training set.
+    Parameters
+    ----------
+    X: array-like, shape (n_samples, n_features)
+        New data, where n_samples is the number of samples
+        and n_features is the number of features.
+    components_: array-like, shape (n_components, n_features)
+    mean_: array-like, shape (n_features,)
+    explained_variance_: array-like, shape (n_components,)
+                        Variance explained by each of the selected components.
+    whiten : bool, optional
+        When True (False by default) the ``components_`` vectors are divided
+        by ``n_samples`` times ``components_`` to ensure uncorrelated outputs
+        with unit component-wise variances.
+        Whitening will remove some information from the transformed signal
+        (the relative variance scales of the components) but can sometimes
+        improve the predictive accuracy of the downstream estimators by
+        making data respect some hard-wired assumptions.
+    Returns
+    -------
+    X_new : array-like, shape (n_samples, n_components)
+    """
+
+    if mean_ is not None:
+        X = X - mean_
+    X_transformed = np.dot(X, components_.T)
+    if whiten:
+        X_transformed /= np.sqrt(explained_variance_)
+    return X_transformed
+
+
+def inverse_transform(X, components_, explained_variance_, mean_=None, whiten=False):
+    """Transform data back to its original space.
+    In other words, return an input X_original whose transform would be X.
+    Parameters
+    ----------
+    X : array-like, shape (n_samples, n_components)
+        New data, where n_samples is the number of samples
+        and n_components is the number of components.
+    components_: array-like, shape (n_components, n_features)
+    mean_: array-like, shape (n_features,)
+    explained_variance_: array-like, shape (n_components,)
+                        Variance explained by each of the selected components.
+    whiten : bool, optional
+        When True (False by default) the ``components_`` vectors are divided
+        by ``n_samples`` times ``components_`` to ensure uncorrelated outputs
+        with unit component-wise variances.
+        Whitening will remove some information from the transformed signal
+        (the relative variance scales of the components) but can sometimes
+        improve the predictive accuracy of the downstream estimators by
+        making data respect some hard-wired assumptions.
+
+    Returns
+    -------
+    X_original array-like, shape (n_samples, n_features)
+    """
+    if whiten:
+        X_transformed = np.dot(X, np.sqrt(explained_variance_[:, np.newaxis]) * components_)
+    else:
+        X_transformed = np.dot(X, components_)
+
+    if mean_ is not None:
+        X_transformed = X_transformed + mean_
+
+    return X_transformed
+
+
+class IOUMetric(object):
+    """
+    Class to calculate mean-iou using fast_hist method
+    """
+
+    def __init__(self, num_classes):
+        self.num_classes = num_classes
+        self.hist = np.zeros((num_classes, num_classes))
+
+    def _fast_hist(self, label_pred, label_true):
+        mask = (label_true >= 0) & (label_true < self.num_classes)
+        hist = np.bincount(
+            self.num_classes * label_true[mask].astype(int) +
+            label_pred[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, self.num_classes)
+        return hist
+
+    def add_batch(self, predictions, gts):
+        for lp, lt in zip(predictions, gts):
+            self.hist += self._fast_hist(lp.flatten(), lt.flatten())
+
+    def evaluate(self):
+        acc = np.diag(self.hist).sum() / self.hist.sum()
+        acc_cls = np.diag(self.hist) / self.hist.sum(axis=1)
+        acc_cls = np.nanmean(acc_cls)
+        iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist))
+        mean_iu = np.nanmean(iu)
+        freq = self.hist.sum(axis=1) / self.hist.sum()
+        fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
+        return acc, acc_cls, iu, mean_iu, fwavacc
\ No newline at end of file
diff --git a/src/sts/projects/SWINTS/configs/Base-SWINTS_r50.yaml b/src/sts/projects/SWINTS/configs/Base-SWINTS_r50.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4938aa10c5fe4f78fc7d056fed05bf6d11d0ad9a
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/Base-SWINTS_r50.yaml
@@ -0,0 +1,50 @@
+MODEL:
+  META_ARCHITECTURE: "SWINTS"
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ROI_HEADS:
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+  ROI_BOX_HEAD:
+    POOLER_TYPE: "ROIAlignV2"
+    POOLER_RESOLUTION: 7
+    POOLER_SAMPLING_RATIO: 2
+  MASK_ON: True
+  REC_HEAD:
+    BATCH_SIZE: 48
+SOLVER:
+  IMS_PER_BATCH: 1
+  BASE_LR: 0.000025
+  WARMUP_FACTOR: 0.01
+  WARMUP_ITERS: 0
+  WEIGHT_DECAY: 0.0001
+  OPTIMIZER: "ADAMW"
+  BACKBONE_MULTIPLIER: 1.0  # keep same with BASE_LR.
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "full_model"
+    CLIP_VALUE: 1.0
+    NORM_TYPE: 2.0
+SEED: 40244023
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800, 832, 864, 896)
+  MAX_SIZE_TRAIN: 1600
+  CROP:
+    ENABLED: True
+    #TYPE: "relative"
+    CROP_INSTANCE: False
+    SIZE: (0.1, 0.1)
+  FORMAT: "RGB"
+  MIN_SIZE_TEST: 1000
+  MAX_SIZE_TEST: 1824
+TEST:
+  EVAL_PERIOD: 2000
+DATALOADER:
+  FILTER_EMPTY_ANNOTATIONS: True
+  NUM_WORKERS: 4
+VERSION: 2
diff --git a/src/sts/projects/SWINTS/configs/Base-SWINTS_swin.yaml b/src/sts/projects/SWINTS/configs/Base-SWINTS_swin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1544c8536b3c052a973e45f961dbb060553b10e
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/Base-SWINTS_swin.yaml
@@ -0,0 +1,49 @@
+MODEL:
+  META_ARCHITECTURE: "SWINTS"
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  BACKBONE:
+    NAME: "build_swint_fpn_backbone"
+  SWINT:
+    OUT_FEATURES: ["stage2", "stage3", "stage4", "stage5"]
+  FPN:
+    IN_FEATURES: ["stage2", "stage3", "stage4", "stage5"]
+  ROI_HEADS:
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+  ROI_BOX_HEAD:
+    POOLER_TYPE: "ROIAlignV2"
+    POOLER_RESOLUTION: 7
+    POOLER_SAMPLING_RATIO: 2
+  MASK_ON: True
+  REC_HEAD:
+    BATCH_SIZE: 4
+SOLVER:
+  IMS_PER_BATCH: 2
+  BASE_LR: 0.000025
+  WARMUP_FACTOR: 0.01
+  WARMUP_ITERS: 1000
+  WEIGHT_DECAY: 0.0001
+  OPTIMIZER: "ADAMW"
+  BACKBONE_MULTIPLIER: 1.0  # keep same with BASE_LR.
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "full_model"
+    CLIP_VALUE: 1.0
+    NORM_TYPE: 2.0
+SEED: 40244023
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800, 832, 864, 896)
+  MAX_SIZE_TRAIN: 1600
+  CROP:
+    ENABLED: True
+    CROP_INSTANCE: False
+    SIZE: (0.1, 0.1)
+  FORMAT: "RGB"
+  MIN_SIZE_TEST: 1000
+  MAX_SIZE_TEST: 1824
+TEST:
+  EVAL_PERIOD: 4000
+DATALOADER:
+  FILTER_EMPTY_ANNOTATIONS: True
+  NUM_WORKERS: 4
+VERSION: 2
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-R50-finetune.yaml b/src/sts/projects/SWINTS/configs/SWINTS-R50-finetune.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b7b1650f6ba1d867443446164ed1cc34b504862f
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-R50-finetune.yaml
@@ -0,0 +1,17 @@
+_BASE_: "Base-SWINTS_r50.yaml"
+MODEL:
+  WEIGHTS: "./output/mixtrain/model_0089999.pth"
+  RESNETS:
+    DEPTH: 50
+    STRIDE_IN_1X1: False
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+DATASETS:
+  TRAIN: ("totaltext_train",)
+  TEST:  ("totaltext_test",)
+SOLVER:
+  STEPS: (6000,)
+  MAX_ITER: 10000
+INPUT:
+  FORMAT: "RGB"
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-R50-mixtrain.yaml b/src/sts/projects/SWINTS/configs/SWINTS-R50-mixtrain.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7c0a96a1dcb54435d219d68eec917da3129c1f63
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-R50-mixtrain.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-SWINTS_r50.yaml"
+MODEL:
+  WEIGHTS: "./output/prain/model_0449999.pth"
+  RESNETS:
+    DEPTH: 50
+    STRIDE_IN_1X1: False
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+DATASETS:
+  TRAIN: ("totaltext_train","icdar_2013_train","icdar_2017_validation_mlt","icdar_2017_mlt",)
+  TEST:  ("totaltext_tset",)
+  # TEST:  ("coco_2017_test-dev",)
+SOLVER:
+  STEPS: (60000,)
+  MAX_ITER: 80000
+INPUT:
+  FORMAT: "RGB"
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-R50-pretrain.yaml b/src/sts/projects/SWINTS/configs/SWINTS-R50-pretrain.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a08eeace0442f6c892d341e21fa1424bec608fa
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-R50-pretrain.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-SWINTS_r50.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+    STRIDE_IN_1X1: False
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+DATASETS:
+  TRAIN: ("totaltext_train","icdar_2015_train","icdar_2013_train","icdar_2017_validation_mlt","icdar_2017_mlt","icdar_curvesynthtext_train1","icdar_curvesynthtext_train2",)
+  TEST:  ("totaltext_test",)
+SOLVER:
+  STEPS: (360000,420000)
+  MAX_ITER: 450000
+  CHECKPOINT_PERIOD: 10000
+INPUT:
+  FORMAT: "RGB"
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-swin-chn_finetune.yaml b/src/sts/projects/SWINTS/configs/SWINTS-swin-chn_finetune.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b9a4497b8ef4f34c6d7c58f5f98fc481a1b295f3
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-swin-chn_finetune.yaml
@@ -0,0 +1,20 @@
+_BASE_: "Base-SWINTS_swin.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+  REC_HEAD:
+    POOLER_RESOLUTION: (16,40)
+    RESOLUTION: (32, 80)
+    BATCH_SIZE: 128
+    NUM_CLASSES: 5463
+DATASETS:
+  TRAIN: ("rects",)
+  TEST:  ("totaltext_test",)
+SOLVER:
+  STEPS: (140000,160000)
+  MAX_ITER: 180000
+  CHECKPOINT_PERIOD: 10000
+INPUT:
+  FORMAT: "RGB"
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-swin-chn_pretrain.yaml b/src/sts/projects/SWINTS/configs/SWINTS-swin-chn_pretrain.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..37a19cb024ef7d51d58a9a4d98edba4b5bed447a
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-swin-chn_pretrain.yaml
@@ -0,0 +1,20 @@
+_BASE_: "Base-SWINTS_swin.yaml"
+MODEL:
+  WEIGHTS: "swin_imagenet_pretrain.pth"
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+  REC_HEAD:
+    POOLER_RESOLUTION: (16,48)
+    RESOLUTION: (32, 80)
+    BATCH_SIZE: 128
+    NUM_CLASSES: 5463
+DATASETS:
+  TRAIN: ("rects","art","lsvt","chn_syn",)
+  TEST:  ("totaltext_test",)
+SOLVER:
+  STEPS: (160000,220000)
+  MAX_ITER: 260000
+  CHECKPOINT_PERIOD: 10000
+INPUT:
+  FORMAT: "RGB"
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-ctw.yaml b/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-ctw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3640cdd15cab5a0ec956f5442112ff4b3a3a8c67
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-ctw.yaml
@@ -0,0 +1,19 @@
+_BASE_: "Base-SWINTS_swin.yaml"
+MODEL:
+  WEIGHTS: ".output/pretrain/model_0449999.pth"
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+  REC_HEAD:
+      POOLER_RESOLUTION: (16,48)
+      RESOLUTION: (32, 96)
+      BATCH_SIZE: 128
+DATASETS:
+  TRAIN: ("ctw1500_train",)
+  TEST:  ("ctw1500_test",)
+SOLVER:
+  STEPS: (30000,)
+  MAX_ITER: 50000
+  CHECKPOINT_PERIOD: 10000
+INPUT:
+  FORMAT: "RGB"
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-ic15.yaml b/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-ic15.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..65c20e264150284324a77f0f743f5f0a999dc5de
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-ic15.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-SWINTS_swin.yaml"
+MODEL:
+  WEIGHTS: ".output/mixtrain/model_0079999.pth"
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+DATASETS:
+  TRAIN: ("icdar2015_train",)
+  TEST:  ("icdar2015_test",)
+SOLVER:
+  STEPS: (6000,)
+  MAX_ITER: 10000
+  CHECKPOINT_PERIOD: 10000
+INPUT:
+  FORMAT: "RGB"
+
+TEST:
+  INFERENCE_TH_TEST: 0.05
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-totaltext.yaml b/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-totaltext.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..df71e4a39466b69725e21ee8695557038f9e3d40
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-totaltext.yaml
@@ -0,0 +1,15 @@
+_BASE_: "Base-SWINTS_swin.yaml"
+MODEL:
+  WEIGHTS: ".output/mixtrain/model_0079999.pth"
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+DATASETS:
+  TRAIN: ("totaltext_train",)
+  TEST:  ("totaltext_test",)
+SOLVER:
+  STEPS: (6000,)
+  MAX_ITER: 10000
+  CHECKPOINT_PERIOD: 10000
+INPUT:
+  FORMAT: "RGB"
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-vintext.yaml b/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-vintext.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9aa520ea96dcec1c65223e6c095d126e47561ea6
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-swin-finetune-vintext.yaml
@@ -0,0 +1,17 @@
+_BASE_: "Base-SWINTS_swin.yaml"
+MODEL:
+  WEIGHTS: "vintext_model_final.pth"
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+DATASETS:
+  TRAIN: ("vintext_train",)
+  TEST:  ("vintext_test",)
+SOLVER:
+  STEPS: (6000,)
+  MAX_ITER: 50000
+  CHECKPOINT_PERIOD: 10000
+INPUT:
+  FORMAT: "RGB"
+TEST:
+  INFERENCE_TH_TEST: 0.5
\ No newline at end of file
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-swin-mixtrain.yaml b/src/sts/projects/SWINTS/configs/SWINTS-swin-mixtrain.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd33886f5f4298d9aaf6cb563a845270bb413cb3
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-swin-mixtrain.yaml
@@ -0,0 +1,17 @@
+_BASE_: "Base-SWINTS_swin.yaml"
+MODEL:
+  WEIGHTS: ".output/pretrain/model_0449999.pth"
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+DATASETS:
+  # TRAIN: ("totaltext_train",)
+  TRAIN: ("totaltext_train","icdar_2015_train","icdar_2013_train","icdar_2017_validation_mlt","icdar_2017_mlt",)
+  TEST:  ("totaltext_test",)
+  # TEST:  ("coco_2017_test-dev",)
+SOLVER:
+  STEPS: (60000,)
+  MAX_ITER: 80000
+  CHECKPOINT_PERIOD: 10000
+INPUT:
+  FORMAT: "RGB"
diff --git a/src/sts/projects/SWINTS/configs/SWINTS-swin-pretrain.yaml b/src/sts/projects/SWINTS/configs/SWINTS-swin-pretrain.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..286c32f15ee23ff2b6eb9fdd9f9a710b0ad83e20
--- /dev/null
+++ b/src/sts/projects/SWINTS/configs/SWINTS-swin-pretrain.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-SWINTS_swin.yaml"
+MODEL:
+  WEIGHTS: "swin_imagenet_pretrain.pth"
+  SWINTS:
+    NUM_PROPOSALS: 300
+    NUM_CLASSES: 2
+DATASETS:
+  # TRAIN: ("totaltext_train",)
+  TRAIN: ("totaltext_train","icdar_2015_train","icdar_2013_train","icdar_2017_validation_mlt","icdar_2017_mlt","icdar_curvesynthtext_train1","icdar_curvesynthtext_train2",)
+  TEST:  ("totaltext_test",)
+SOLVER:
+  STEPS: (360000,420000)
+  MAX_ITER: 450000
+  CHECKPOINT_PERIOD: 10000
+INPUT:
+  FORMAT: "RGB"
diff --git a/src/sts/projects/SWINTS/swints/FocalTransformer.py b/src/sts/projects/SWINTS/swints/FocalTransformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f2e8313f1b590e3bed462ded088865c41010a00
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/FocalTransformer.py
@@ -0,0 +1,680 @@
+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+import math
+
+
+class FocalTransformerBlock(nn.Module):
+    r""" Focal Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resulotion.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        expand_size (int): expand size at first focal level (finest level).
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm 
+        pool_method (str): window pooling method. Default: none, options: [none|fc|conv]
+        focal_level (int): number of focal levels. Default: 1. 
+        focal_window (int): region size of focal attention. Default: 1
+        use_layerscale (bool): whether use layer scale for training stability. Default: False
+        layerscale_value (float): scaling value for layer scale. Default: 1e-4
+    """
+
+    def __init__(self, dim, input_resolution, num_heads, window_size=7, expand_size=0, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm, pool_method="none", 
+                 focal_level=1, focal_window=1, use_layerscale=False, layerscale_value=1e-4):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.expand_size = expand_size
+        self.mlp_ratio = mlp_ratio
+        self.pool_method = pool_method
+        self.focal_level = focal_level
+        self.focal_window = focal_window
+        self.use_layerscale = use_layerscale
+
+        if min(self.input_resolution) <= self.window_size:
+            # if window size is larger than input resolution, we don't partition windows
+            self.expand_size = 0
+            self.shift_size = 0
+            self.window_size = min(self.input_resolution)
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+
+        self.window_size_glo = self.window_size
+
+        self.pool_layers = nn.ModuleList()
+        if self.pool_method != "none":
+            for k in range(self.focal_level-1):
+                window_size_glo = math.floor(self.window_size_glo / (2 ** k))
+                if self.pool_method == "fc":
+                    self.pool_layers.append(nn.Linear(window_size_glo * window_size_glo, 1))
+                    self.pool_layers[-1].weight.data.fill_(1./(window_size_glo * window_size_glo))
+                    self.pool_layers[-1].bias.data.fill_(0)
+                elif self.pool_method == "conv":
+                    self.pool_layers.append(nn.Conv2d(dim, dim, kernel_size=window_size_glo, stride=window_size_glo, groups=dim))
+
+        self.norm1 = norm_layer(dim)
+
+        self.attn = WindowAttention(
+            dim, expand_size=self.expand_size, window_size=(self.window_size,self.window_size), 
+            focal_window=focal_window, focal_level=focal_level, num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop, pool_method=pool_method)
+
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        if self.shift_size > 0:
+            # calculate attention mask for SW-MSA
+            H, W = self.input_resolution
+            img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+            h_slices = (slice(0, -self.window_size),
+                        slice(-self.window_size, -self.shift_size),
+                        slice(-self.shift_size, None))
+            w_slices = (slice(0, -self.window_size),
+                        slice(-self.window_size, -self.shift_size),
+                        slice(-self.shift_size, None))
+            cnt = 0
+            for h in h_slices:
+                for w in w_slices:
+                    img_mask[:, h, w, :] = cnt
+                    cnt += 1
+
+            mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+            mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+            attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+        else:
+            attn_mask = None
+        self.register_buffer("attn_mask", attn_mask)
+
+        if self.use_layerscale:
+            self.gamma_1 = nn.Parameter(layerscale_value * torch.ones((dim)), requires_grad=True)
+            self.gamma_2 = nn.Parameter(layerscale_value * torch.ones((dim)), requires_grad=True)
+
+    def forward(self, x):
+        H, W = self.input_resolution
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+
+        # pad feature maps to multiples of window size
+        pad_l = pad_t = 0
+        pad_r = (self.window_size - W % self.window_size) % self.window_size
+        pad_b = (self.window_size - H % self.window_size) % self.window_size
+        if pad_r > 0 or pad_b > 0:
+            x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
+        
+        B, H, W, C = x.shape    
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+        else:
+            shifted_x = x
+        
+        x_windows_all = [shifted_x]
+        x_window_masks_all = [self.attn_mask]
+        
+        if self.focal_level > 1 and self.pool_method != "none": 
+            # if we add coarser granularity and the pool method is not none
+            for k in range(self.focal_level-1):     
+                window_size_glo = math.floor(self.window_size_glo / (2 ** k))
+                pooled_h = math.ceil(H / self.window_size) * (2 ** k)
+                pooled_w = math.ceil(W / self.window_size) * (2 ** k)
+                H_pool = pooled_h * window_size_glo
+                W_pool = pooled_w * window_size_glo
+
+                x_level_k = shifted_x
+                # trim or pad shifted_x depending on the required size
+                if H > H_pool:
+                    trim_t = (H - H_pool) // 2
+                    trim_b = H - H_pool - trim_t
+                    x_level_k = x_level_k[:, trim_t:-trim_b]
+                elif H < H_pool:
+                    pad_t = (H_pool - H) // 2
+                    pad_b = H_pool - H - pad_t
+                    x_level_k = F.pad(x_level_k, (0,0,0,0,pad_t,pad_b))
+                
+                if W > W_pool:
+                    trim_l = (W - W_pool) // 2
+                    trim_r = W - W_pool - trim_l
+                    x_level_k = x_level_k[:, :, trim_l:-trim_r]
+                elif W < W_pool:
+                    pad_l = (W_pool - W) // 2
+                    pad_r = W_pool - W - pad_l
+                    x_level_k = F.pad(x_level_k, (0,0,pad_l,pad_r))
+
+                x_windows_noreshape = window_partition_noreshape(x_level_k.contiguous(), window_size_glo) # B, nw, nw, window_size, window_size, C    
+                nWh, nWw = x_windows_noreshape.shape[1:3]
+                if self.pool_method == "mean":
+                    x_windows_pooled = x_windows_noreshape.mean([3, 4]) # B, nWh, nWw, C
+                elif self.pool_method == "max":
+                    x_windows_pooled = x_windows_noreshape.max(-2)[0].max(-2)[0].view(B, nWh, nWw, C) # B, nWh, nWw, C                    
+                elif self.pool_method == "fc":
+                    x_windows_noreshape = x_windows_noreshape.view(B, nWh, nWw, window_size_glo*window_size_glo, C).transpose(3, 4) # B, nWh, nWw, C, wsize**2
+                    x_windows_pooled = self.pool_layers[k](x_windows_noreshape).flatten(-2) # B, nWh, nWw, C                      
+                elif self.pool_method == "conv":
+                    x_windows_noreshape = x_windows_noreshape.view(-1, window_size_glo, window_size_glo, C).permute(0, 3, 1, 2).contiguous() # B * nw * nw, C, wsize, wsize
+                    x_windows_pooled = self.pool_layers[k](x_windows_noreshape).view(B, nWh, nWw, C) # B, nWh, nWw, C           
+
+                x_windows_all += [x_windows_pooled]
+                x_window_masks_all += [None]
+        
+        attn_windows = self.attn(x_windows_all, mask_all=x_window_masks_all)  # nW*B, window_size*window_size, C
+
+        attn_windows = attn_windows[:, :self.window_size ** 2]
+        
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
+
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+        x = x[:, :self.input_resolution[0], :self.input_resolution[1]].contiguous().view(B, -1, C)
+
+        # FFN
+        x = shortcut + self.drop_path(x if (not self.use_layerscale) else (self.gamma_1 * x))
+        x = x + self.drop_path(self.mlp(self.norm2(x)) if (not self.use_layerscale) else (self.gamma_2 * self.mlp(self.norm2(x))))
+
+        return x
+
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
+               f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
+
+    def flops(self):
+        flops = 0
+        H, W = self.input_resolution
+        # norm1
+        flops += self.dim * H * W
+        
+        # W-MSA/SW-MSA
+        nW = H * W / self.window_size / self.window_size
+        flops += nW * self.attn.flops(self.window_size * self.window_size, self.window_size, self.focal_window)
+
+        if self.pool_method != "none" and self.focal_level > 1:
+            for k in range(self.focal_level-1):
+                window_size_glo = math.floor(self.window_size_glo / (2 ** k))
+                nW_glo = nW * (2**k)
+                # (sub)-window pooling
+                flops += nW_glo * self.dim * window_size_glo * window_size_glo         
+                # qkv for global levels
+                # NOTE: in our implementation, we pass the pooled window embedding to qkv embedding layer, 
+                # but theoritically, we only need to compute k and v.
+                flops += nW_glo * self.dim * 3 * self.dim       
+
+        # mlp
+        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
+        # norm2
+        flops += self.dim * H * W
+        return flops
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+
+def window_partition_noreshape(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (B, num_windows_h, num_windows_w, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous()
+    return windows
+
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+
+def get_roll_masks(H, W, window_size, shift_size):
+    #####################################
+    # move to top-left
+    img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+    h_slices = (slice(0, H-window_size),
+                slice(H-window_size, H-shift_size),
+                slice(H-shift_size, H))
+    w_slices = (slice(0, W-window_size),
+                slice(W-window_size, W-shift_size),
+                slice(W-shift_size, W))
+    cnt = 0
+    for h in h_slices:
+        for w in w_slices:
+            img_mask[:, h, w, :] = cnt
+            cnt += 1
+
+    mask_windows = window_partition(img_mask, window_size)  # nW, window_size, window_size, 1
+    mask_windows = mask_windows.view(-1, window_size * window_size)
+    attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+    attn_mask_tl = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+    ####################################
+    # move to top right
+    img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+    h_slices = (slice(0, H-window_size),
+                slice(H-window_size, H-shift_size),
+                slice(H-shift_size, H))
+    w_slices = (slice(0, shift_size),
+                slice(shift_size, window_size),
+                slice(window_size, W))
+    cnt = 0
+    for h in h_slices:
+        for w in w_slices:
+            img_mask[:, h, w, :] = cnt
+            cnt += 1
+
+    mask_windows = window_partition(img_mask, window_size)  # nW, window_size, window_size, 1
+    mask_windows = mask_windows.view(-1, window_size * window_size)
+    attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+    attn_mask_tr = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+    ####################################
+    # move to bottom left
+    img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+    h_slices = (slice(0, shift_size),
+                slice(shift_size, window_size),
+                slice(window_size, H))
+    w_slices = (slice(0, W-window_size),
+                slice(W-window_size, W-shift_size),
+                slice(W-shift_size, W))
+    cnt = 0
+    for h in h_slices:
+        for w in w_slices:
+            img_mask[:, h, w, :] = cnt
+            cnt += 1
+
+    mask_windows = window_partition(img_mask, window_size)  # nW, window_size, window_size, 1
+    mask_windows = mask_windows.view(-1, window_size * window_size)
+    attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+    attn_mask_bl = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+    ####################################
+    # move to bottom right
+    img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+    h_slices = (slice(0, shift_size),
+                slice(shift_size, window_size),
+                slice(window_size, H))
+    w_slices = (slice(0, shift_size),
+                slice(shift_size, window_size),
+                slice(window_size, W))
+    cnt = 0
+    for h in h_slices:
+        for w in w_slices:
+            img_mask[:, h, w, :] = cnt
+            cnt += 1
+
+    mask_windows = window_partition(img_mask, window_size)  # nW, window_size, window_size, 1
+    mask_windows = mask_windows.view(-1, window_size * window_size)
+    attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+    attn_mask_br = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+    # append all
+    attn_mask_all = torch.cat((attn_mask_tl, attn_mask_tr, attn_mask_bl, attn_mask_br), -1)
+    return attn_mask_all
+
+def get_relative_position_index(q_windows, k_windows):
+    """
+    Args:
+        q_windows: tuple (query_window_height, query_window_width)
+        k_windows: tuple (key_window_height, key_window_width)
+    Returns:
+        relative_position_index: query_window_height*query_window_width, key_window_height*key_window_width
+    """
+    # get pair-wise relative position index for each token inside the window
+    coords_h_q = torch.arange(q_windows[0])
+    coords_w_q = torch.arange(q_windows[1])
+    coords_q = torch.stack(torch.meshgrid([coords_h_q, coords_w_q]))  # 2, Wh_q, Ww_q
+
+    coords_h_k = torch.arange(k_windows[0])
+    coords_w_k = torch.arange(k_windows[1])
+    coords_k = torch.stack(torch.meshgrid([coords_h_k, coords_w_k]))  # 2, Wh, Ww
+
+    coords_flatten_q = torch.flatten(coords_q, 1)  # 2, Wh_q*Ww_q
+    coords_flatten_k = torch.flatten(coords_k, 1)  # 2, Wh_k*Ww_k
+
+    relative_coords = coords_flatten_q[:, :, None] - coords_flatten_k[:, None, :]  # 2, Wh_q*Ww_q, Wh_k*Ww_k
+    relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh_q*Ww_q, Wh_k*Ww_k, 2
+    relative_coords[:, :, 0] += k_windows[0] - 1  # shift to start from 0
+    relative_coords[:, :, 1] += k_windows[1] - 1
+    relative_coords[:, :, 0] *= (q_windows[1] + k_windows[1]) - 1
+    relative_position_index = relative_coords.sum(-1)  #  Wh_q*Ww_q, Wh_k*Ww_k
+    return relative_position_index
+
+class WindowAttention(nn.Module):
+    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
+    Args:
+        dim (int): Number of input channels.
+        expand_size (int): The expand size at focal level 1.
+        window_size (tuple[int]): The height and width of the window.
+        focal_window (int): Focal region size.
+        focal_level (int): Focal attention level.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0 
+        pool_method (str): window pooling method. Default: none
+    """
+
+    def __init__(self, dim, expand_size, window_size, focal_window, focal_level, num_heads, 
+                    qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0., pool_method="none"):
+
+        super().__init__()
+        self.dim = dim
+        self.expand_size = expand_size
+        self.window_size = window_size  # Wh, Ww
+        self.pool_method = pool_method
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.focal_level = focal_level
+        self.focal_window = focal_window
+
+        # define a parameter table of relative position bias for each window
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+
+        if self.expand_size > 0 and focal_level > 0:
+            # define a parameter table of position bias between window and its fine-grained surroundings
+            self.window_size_of_key = self.window_size[0] * self.window_size[1] if self.expand_size == 0 else \
+                (4 * self.window_size[0] * self.window_size[1] - 4 * (self.window_size[0] -  self.expand_size) * (self.window_size[0] -  self.expand_size))        
+            self.relative_position_bias_table_to_neighbors = nn.Parameter(
+                torch.zeros(1, num_heads, self.window_size[0] * self.window_size[1], self.window_size_of_key))  # Wh*Ww, nH, nSurrounding
+            trunc_normal_(self.relative_position_bias_table_to_neighbors, std=.02)
+
+            # get mask for rolled k and rolled v
+            mask_tl = torch.ones(self.window_size[0], self.window_size[1]); mask_tl[:-self.expand_size, :-self.expand_size] = 0
+            mask_tr = torch.ones(self.window_size[0], self.window_size[1]); mask_tr[:-self.expand_size, self.expand_size:] = 0
+            mask_bl = torch.ones(self.window_size[0], self.window_size[1]); mask_bl[self.expand_size:, :-self.expand_size] = 0
+            mask_br = torch.ones(self.window_size[0], self.window_size[1]); mask_br[self.expand_size:, self.expand_size:] = 0
+            mask_rolled = torch.stack((mask_tl, mask_tr, mask_bl, mask_br), 0).flatten(0)
+            self.register_buffer("valid_ind_rolled", mask_rolled.nonzero().view(-1))
+
+        if pool_method != "none" and focal_level > 1:
+            self.relative_position_bias_table_to_windows = nn.ParameterList()
+            self.unfolds = nn.ModuleList()
+
+            # build relative position bias between local patch and pooled windows
+            for k in range(focal_level-1):
+                stride = 2**k    
+                kernel_size = 2*(self.focal_window // 2) + 2**k + (2**k-1)
+                # define unfolding operations                
+                self.unfolds += [nn.Unfold(
+                    kernel_size=(kernel_size, kernel_size), 
+                    stride=stride, padding=kernel_size // 2)
+                ]
+
+                # define relative position bias table
+                relative_position_bias_table_to_windows = nn.Parameter(
+                    torch.zeros(
+                        self.num_heads,
+                        (self.window_size[0] + self.focal_window + 2**k - 2) * (self.window_size[1] + self.focal_window + 2**k - 2), 
+                        )
+                )
+                trunc_normal_(relative_position_bias_table_to_windows, std=.02)
+                self.relative_position_bias_table_to_windows.append(relative_position_bias_table_to_windows)
+
+                # define relative position bias index
+                relative_position_index_k = get_relative_position_index(self.window_size, to_2tuple(self.focal_window + 2**k - 1))
+                self.register_buffer("relative_position_index_{}".format(k), relative_position_index_k)
+
+                # define unfolding index for focal_level > 0
+                if k > 0:
+                    mask = torch.zeros(kernel_size, kernel_size); mask[(2**k)-1:, (2**k)-1:] = 1
+                    self.register_buffer("valid_ind_unfold_{}".format(k), mask.flatten(0).nonzero().view(-1))
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x_all, mask_all=None):
+        """
+        Args:
+            x_all (list[Tensors]): input features at different granularity
+            mask_all (list[Tensors/None]): masks for input features at different granularity
+        """
+        x = x_all[0] # 
+        B, nH, nW, C = x.shape
+        qkv = self.qkv(x).reshape(B, nH, nW, 3, C).permute(3, 0, 1, 2, 4).contiguous()
+        q, k, v = qkv[0], qkv[1], qkv[2]  # B, nH, nW, C
+
+        # partition q map
+        (q_windows, k_windows, v_windows) = map(
+            lambda t: window_partition(t, self.window_size[0]).view(
+            -1, self.window_size[0] * self.window_size[0], self.num_heads, C // self.num_heads
+            ).transpose(1, 2), 
+            (q, k, v)
+        )
+
+        if self.expand_size > 0 and self.focal_level > 0:
+            (k_tl, v_tl) = map(
+                lambda t: torch.roll(t, shifts=(-self.expand_size, -self.expand_size), dims=(1, 2)), (k, v)
+            )
+            (k_tr, v_tr) = map(
+                lambda t: torch.roll(t, shifts=(-self.expand_size, self.expand_size), dims=(1, 2)), (k, v)
+            )
+            (k_bl, v_bl) = map(
+                lambda t: torch.roll(t, shifts=(self.expand_size, -self.expand_size), dims=(1, 2)), (k, v)
+            )
+            (k_br, v_br) = map(
+                lambda t: torch.roll(t, shifts=(self.expand_size, self.expand_size), dims=(1, 2)), (k, v)
+            )        
+            
+            (k_tl_windows, k_tr_windows, k_bl_windows, k_br_windows) = map(
+                lambda t: window_partition(t, self.window_size[0]).view(-1, self.window_size[0] * self.window_size[0], self.num_heads, C // self.num_heads), 
+                (k_tl, k_tr, k_bl, k_br)
+            )            
+            (v_tl_windows, v_tr_windows, v_bl_windows, v_br_windows) = map(
+                lambda t: window_partition(t, self.window_size[0]).view(-1, self.window_size[0] * self.window_size[0], self.num_heads, C // self.num_heads), 
+                (v_tl, v_tr, v_bl, v_br)
+            )
+            k_rolled = torch.cat((k_tl_windows, k_tr_windows, k_bl_windows, k_br_windows), 1).transpose(1, 2)
+            v_rolled = torch.cat((v_tl_windows, v_tr_windows, v_bl_windows, v_br_windows), 1).transpose(1, 2)
+            
+            # mask out tokens in current window
+            k_rolled = k_rolled[:, :, self.valid_ind_rolled]
+            v_rolled = v_rolled[:, :, self.valid_ind_rolled]
+            k_rolled = torch.cat((k_windows, k_rolled), 2)
+            v_rolled = torch.cat((v_windows, v_rolled), 2)
+        else:
+            k_rolled = k_windows; v_rolled = v_windows; 
+        if self.pool_method != "none" and self.focal_level > 1:
+            k_pooled = []
+            v_pooled = []
+            for k in range(self.focal_level-1):
+                stride = 2**k
+                x_window_pooled = x_all[k+1]  # B, nWh, nWw, C
+                nWh, nWw = x_window_pooled.shape[1:3] 
+
+                # generate mask for pooled windows
+                mask = x_window_pooled.new(nWh, nWw).fill_(1)
+                unfolded_mask = self.unfolds[k](mask.unsqueeze(0).unsqueeze(1)).view(
+                    1, 1, self.unfolds[k].kernel_size[0], self.unfolds[k].kernel_size[1], -1).permute(0, 4, 2, 3, 1).contiguous().\
+                    view(nWh*nWw // stride // stride, -1, 1)
+
+                if k > 0:
+                    valid_ind_unfold_k = getattr(self, "valid_ind_unfold_{}".format(k))
+                    unfolded_mask = unfolded_mask[:, valid_ind_unfold_k]
+
+                x_window_masks = unfolded_mask.flatten(1).unsqueeze(0)
+                x_window_masks = x_window_masks.masked_fill(x_window_masks == 0, float(-100.0)).masked_fill(x_window_masks > 0, float(0.0))            
+                mask_all[k+1] = x_window_masks
+
+                # generate k and v for pooled windows                
+                qkv_pooled = self.qkv(x_window_pooled).reshape(B, nWh, nWw, 3, C).permute(3, 0, 4, 1, 2).contiguous()
+                k_pooled_k, v_pooled_k = qkv_pooled[1], qkv_pooled[2]  # B, C, nWh, nWw
+
+
+                (k_pooled_k, v_pooled_k) = map(
+                    lambda t: self.unfolds[k](t).view(
+                    B, C, self.unfolds[k].kernel_size[0], self.unfolds[k].kernel_size[1], -1).permute(0, 4, 2, 3, 1).contiguous().\
+                    view(-1, self.unfolds[k].kernel_size[0]*self.unfolds[k].kernel_size[1], self.num_heads, C // self.num_heads).transpose(1, 2), 
+                    (k_pooled_k, v_pooled_k)  # (B x (nH*nW)) x nHeads x (unfold_wsize x unfold_wsize) x head_dim
+                )
+
+                if k > 0:                    
+                    (k_pooled_k, v_pooled_k) = map(
+                        lambda t: t[:, :, valid_ind_unfold_k], (k_pooled_k, v_pooled_k)
+                    )
+
+                k_pooled += [k_pooled_k]
+                v_pooled += [v_pooled_k]
+            k_all = torch.cat([k_rolled] + k_pooled, 2)
+            v_all = torch.cat([v_rolled] + v_pooled, 2)
+        else:
+            k_all = k_rolled
+            v_all = v_rolled
+
+        N = k_all.shape[-2]
+        q_windows = q_windows * self.scale
+        attn = (q_windows @ k_all.transpose(-2, -1))  # B*nW, nHead, window_size*window_size, focal_window_size*focal_window_size
+
+        window_area = self.window_size[0] * self.window_size[1]        
+        window_area_rolled = k_rolled.shape[2]
+
+        # add relative position bias for tokens inside window
+        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn[:, :, :window_area, :window_area] = attn[:, :, :window_area, :window_area] + relative_position_bias.unsqueeze(0)
+
+        # add relative position bias for patches inside a window
+        if self.expand_size > 0 and self.focal_level > 0:
+            attn[:, :, :window_area, window_area:window_area_rolled] = attn[:, :, :window_area, window_area:window_area_rolled] + self.relative_position_bias_table_to_neighbors
+
+        if self.pool_method != "none" and self.focal_level > 1:
+            # add relative position bias for different windows in an image        
+            offset = window_area_rolled
+            for k in range(self.focal_level-1):
+                # add relative position bias
+                relative_position_index_k = getattr(self, 'relative_position_index_{}'.format(k))
+                relative_position_bias_to_windows = self.relative_position_bias_table_to_windows[k][:, relative_position_index_k.view(-1)].view(
+                    -1, self.window_size[0] * self.window_size[1], (self.focal_window+2**k-1)**2,
+                ) # nH, NWh*NWw,focal_region*focal_region
+                attn[:, :, :window_area, offset:(offset + (self.focal_window+2**k-1)**2)] = \
+                    attn[:, :, :window_area, offset:(offset + (self.focal_window+2**k-1)**2)] + relative_position_bias_to_windows.unsqueeze(0)
+                # add attentional mask
+                if mask_all[k+1] is not None:
+                    attn[:, :, :window_area, offset:(offset + (self.focal_window+2**k-1)**2)] = \
+                        attn[:, :, :window_area, offset:(offset + (self.focal_window+2**k-1)**2)] + \
+                            mask_all[k+1][:, :, None, None, :].repeat(attn.shape[0] // mask_all[k+1].shape[1], 1, 1, 1, 1).view(-1, 1, 1, mask_all[k+1].shape[-1])
+                    
+                offset += (self.focal_window+2**k-1)**2
+        
+        if mask_all[0] is not None:
+            nW = mask_all[0].shape[0]
+            attn = attn.view(attn.shape[0] // nW, nW, self.num_heads, window_area, N)
+            attn[:, :, :, :, :window_area] = attn[:, :, :, :, :window_area] + mask_all[0][None, :, None, :, :]
+            attn = attn.view(-1, self.num_heads, window_area, N)
+            attn = self.softmax(attn)
+        else:          
+            attn = self.softmax(attn)
+        
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v_all).transpose(1, 2).reshape(attn.shape[0], window_area, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+    def extra_repr(self) -> str:
+        return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}'
+
+    def flops(self, N, window_size, unfold_size):
+        # calculate flops for 1 window with token length of N
+        flops = 0
+        # qkv = self.qkv(x)
+        flops += N * self.dim * 3 * self.dim
+        # attn = (q @ k.transpose(-2, -1))
+        flops += self.num_heads * N * (self.dim // self.num_heads) * N        
+        if self.pool_method != "none" and self.focal_level > 1:
+            flops += self.num_heads * N * (self.dim // self.num_heads) * (unfold_size * unfold_size)          
+        if self.expand_size > 0 and self.focal_level > 0:
+            flops += self.num_heads * N * (self.dim // self.num_heads) * ((window_size + 2*self.expand_size)**2-window_size**2)          
+
+        #  x = (attn @ v)
+        flops += self.num_heads * N * N * (self.dim // self.num_heads)
+        if self.pool_method != "none" and self.focal_level > 1:
+            flops += self.num_heads * N * (self.dim // self.num_heads) * (unfold_size * unfold_size)          
+        if self.expand_size > 0 and self.focal_level > 0:
+            flops += self.num_heads * N * (self.dim // self.num_heads) * ((window_size + 2*self.expand_size)**2-window_size**2)          
+
+        # x = self.proj(x)
+        flops += N * self.dim * self.dim
+        return 
diff --git a/src/sts/projects/SWINTS/swints/MaskEncoding.py b/src/sts/projects/SWINTS/swints/MaskEncoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..412602709dcedf9014fbd271dfd191230f7a0fa0
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/MaskEncoding.py
@@ -0,0 +1,124 @@
+import torch
+import torch.nn as nn
+
+VALUE_MAX = 0.05
+VALUE_MIN = 0.01
+
+
+@torch.no_grad()
+class PCAMaskEncoding(nn.Module):
+    """
+    To do the mask encoding of PCA.
+        components_: (tensor), shape (n_components, n_features) if agnostic=True
+                                else (n_samples, n_components, n_features)
+        explained_variance_: Variance explained by each of the selected components.
+                            (tensor), shape (n_components) if agnostic=True
+                                        else (n_samples, n_components)
+        mean_: (tensor), shape (n_features) if agnostic=True
+                          else (n_samples, n_features)
+        agnostic: (bool), whether class_agnostic or class_specific.
+        whiten : (bool), optional
+        When True (False by default) the ``components_`` vectors are divided
+        by ``n_samples`` times ``components_`` to ensure uncorrelated outputs
+        with unit component-wise variances.
+        Whitening will remove some information from the transformed signal
+        (the relative variance scales of the components) but can sometimes
+        improve the predictive accuracy of the downstream estimators by
+        making data respect some hard-wired assumptions.
+        sigmoid: (bool) whether to apply inverse sigmoid before transform.
+    """
+    def __init__(self, cfg):
+        super().__init__()
+        self.cfg = cfg
+        self.agnostic = True #cfg.MODEL.SWINTS.AGNOSTIC
+        self.whiten = True #cfg.MODEL.SWINTS.WHITEN
+        self.sigmoid = True #cfg.MODEL.SWINTS.SIGMOID
+        self.dim_mask = cfg.MODEL.SWINTS.MASK_DIM
+        self.mask_size = 28 #cfg.MODEL.SWINTS.MASK_SIZE
+
+        if self.agnostic:
+            self.components = nn.Parameter(torch.zeros(self.dim_mask, self.mask_size**2), requires_grad=False)
+            self.explained_variances = nn.Parameter(torch.zeros(self.dim_mask), requires_grad=False)
+            self.means = nn.Parameter(torch.zeros(self.mask_size**2), requires_grad=False)
+        else:
+            raise NotImplementedError
+
+    def inverse_sigmoid(self, x):
+        """Apply the inverse sigmoid operation.
+                y = -ln(1-x/x)
+        """
+        # In case of overflow
+        value_random = VALUE_MAX * torch.rand_like(x)
+        value_random = torch.where(value_random > VALUE_MIN, value_random, VALUE_MIN * torch.ones_like(x))
+        x = torch.where(x > value_random, 1 - value_random, value_random)
+        # inverse sigmoid
+        y = -1 * torch.log((1 - x) / x)
+        return y
+
+    def encoder(self, X):
+        """Apply dimensionality reduction to X.
+        X is projected on the first principal components previously extracted
+        from a training set.
+        Parameters
+        ----------
+        X : Original features(tensor), shape (n_samples, n_features)
+            New data, where n_samples is the number of samples
+            and n_features is the number of features.
+
+        Returns
+        -------
+        X_transformed : Transformed features(tensor), shape (n_samples, n_features)
+        """
+        assert X.shape[1] == self.mask_size**2, print("The original mask_size of input"
+                                                      " should be equal to the supposed size.")
+
+        if self.sigmoid:
+            X = self.inverse_sigmoid(X)
+
+        if self.agnostic:
+            if self.means is not None:
+                X_transformed = X - self.means
+            X_transformed = torch.matmul(X_transformed, self.components.T)
+            if self.whiten:
+                X_transformed /= torch.sqrt(self.explained_variances)
+        else:
+            # TODO: The class-specific version has not implemented.
+            raise NotImplementedError
+
+        return X_transformed
+
+    def decoder(self, X, is_train=False):
+        """Transform data back to its original space.
+        In other words, return an input X_original whose transform would be X.
+        Parameters
+        ----------
+        X : Encoded features(tensor), shape (n_samples, n_components)
+            New data, where n_samples is the number of samples
+            and n_components is the number of components.
+
+        Returns
+        -------
+        X_original original features(tensor), shape (n_samples, n_features)
+        """
+        assert X.shape[1] == self.dim_mask, print("The dim of transformed data "
+                                                  "should be equal to the supposed dim.")
+
+        if self.agnostic:
+            if self.whiten:
+                components_ = self.components * torch.sqrt(self.explained_variances.unsqueeze(1))
+            X_transformed = torch.matmul(X, components_)
+            if self.means is not None:
+                X_transformed = X_transformed + self.means
+        else:
+            # TODO: The class-specific version has not implemented.
+            raise NotImplementedError
+
+        if is_train:
+            pass
+        else:
+            if self.sigmoid:
+                X_transformed = torch.sigmoid(X_transformed)
+            else:
+                X_transformed = torch.clamp(X_transformed, min=0.01, max=0.99)
+
+        return X_transformed
diff --git a/src/sts/projects/SWINTS/swints/__init__.py b/src/sts/projects/SWINTS/swints/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..db28125d64eea1bc95504d276182989a7452d688
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/__init__.py
@@ -0,0 +1,3 @@
+from .config import add_SWINTS_config
+from .swints import SWINTS
+from .dataset_mapper import SWINTSDatasetMapper
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/FocalTransformer.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/FocalTransformer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f4052edfbb7bbc83f5800441f8bf66d09c0dc08
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/FocalTransformer.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/MaskEncoding.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/MaskEncoding.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7d382e02d0c8f944b40dc8c6aa1ed30344958821
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/MaskEncoding.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/__init__.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b06a9d78064fe232447d3468ed7a7b15aaf5735
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/config.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f964e427686e723bd7652ced139c8cd655c754f9
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/config.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/dataset_mapper.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/dataset_mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..15c196be9a8863644c52ecae3c03ea971c087131
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/dataset_mapper.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/head.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/head.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e0299c7273345f763d8c01f834b5bb848fc5c964
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/head.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/loss.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/loss.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b11a2c41e4fcbb6e1e4c7d1ec0b123ed15ca4d50
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/loss.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/rec_stage.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/rec_stage.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64710edcb46c8f9748f70e5041bdda4e111ae213
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/rec_stage.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/roi_seq_predictors.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/roi_seq_predictors.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..be1f53fbb675065c77ac38176ac686f512e5b483
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/roi_seq_predictors.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/swints.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/swints.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78eac0b1bebadebc928b78085aaaf813e7d41979
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/swints.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/__pycache__/transformer.cpython-38.pyc b/src/sts/projects/SWINTS/swints/__pycache__/transformer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3497b1194a0a2a6b25b873e3f5f4d1c46fa25c62
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/__pycache__/transformer.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/beam_search.py b/src/sts/projects/SWINTS/swints/beam_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e39f0223cfb3c6752c091a77da83812afaeee30
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/beam_search.py
@@ -0,0 +1,95 @@
+import torch
+from .topk import TopK
+
+class BeamNode(object):
+    def __init__(self, seq, state, score):
+        self.seq = seq
+        self.state = state
+        self.score = score
+        self.avg_score = score / len(seq)
+
+    def __cmp__(self, other):
+        if self.avg_score == other.avg_score:
+            return 0
+        elif self.avg_score < other.avg_score:
+            return -1
+        else:
+            return 1
+
+    def __lt__(self, other):
+        return self.avg_score < other.avg_score
+
+    def __eq__(self, other):
+        return self.avg_score == other.avg_score
+
+class BeamSearch(object):
+    """Class to generate sequences from an image-to-text model."""
+
+    def __init__(self,
+                 decode_step,
+                 eos,
+                 beam_size=2,
+                 max_seq_len=32):
+        self.decode_step = decode_step
+        self.eos = eos
+        self.beam_size = beam_size
+        self.max_seq_len = max_seq_len
+
+    def beam_search(self, init_inputs, init_states):
+        # self.beam_size = 1
+        batch_size = len(init_inputs)
+        part_seqs = [TopK(self.beam_size) for _ in range(batch_size)]
+        comp_seqs = [TopK(self.beam_size) for _ in range(batch_size)]
+
+        # print(init_inputs.shape, init_states.shape)
+        words, scores, states = self.decode_step(init_inputs, init_states, k=self.beam_size)
+        for batch_id in range(batch_size):
+            for i in range(self.beam_size):
+                node = BeamNode([words[batch_id][i]], states[:, :, batch_id, :], scores[batch_id][i])
+                part_seqs[batch_id].push(node)
+
+        for t in range(self.max_seq_len - 1):
+            part_seq_list = []
+            for p in part_seqs:
+                part_seq_list.append(p.extract())
+                p.reset()
+
+            inputs, states = [], []
+            for seq_list in part_seq_list:
+                for node in seq_list:
+                    inputs.append(node.seq[-1])
+                    states.append(node.state)
+            if len(inputs) == 0:
+                break
+
+            inputs = torch.stack(inputs)
+            states = torch.stack(states, dim=2)
+            words, scores, states = self.decode_step(inputs, states, k=self.beam_size + 1)
+
+            idx = 0
+            for batch_id in range(batch_size):
+                for node in part_seq_list[batch_id]:
+                    tmp_state = states[:, :, idx, :]
+                    k = 0
+                    num_hyp = 0
+                    while num_hyp < self.beam_size:
+                        word = words[idx][k]
+                        tmp_seq = node.seq + [word]
+                        tmp_score = node.score + scores[idx][k]
+                        tmp_node = BeamNode(tmp_seq, tmp_state, tmp_score)
+                        k += 1
+                        num_hyp += 1
+
+                        if word == self.eos:
+                            comp_seqs[batch_id].push(tmp_node)
+                            num_hyp -= 1
+                        else:
+                            part_seqs[batch_id].push(tmp_node)
+                    idx += 1
+
+        for batch_id in range(batch_size):
+            if not comp_seqs[batch_id].size():
+                comp_seqs[batch_id] = part_seqs[batch_id]
+        seqs = [seq_list.extract(sort=True)[0].seq for seq_list in comp_seqs]
+        seq_scores = [seq_list.extract(sort=True)[0].avg_score for seq_list in comp_seqs]
+        return seqs, seq_scores
\ No newline at end of file
diff --git a/src/sts/projects/SWINTS/swints/config.py b/src/sts/projects/SWINTS/swints/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b72b42444671a0670107e93eaafb78db2a9c815
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/config.py
@@ -0,0 +1,80 @@
+from detectron2.config import CfgNode as CN
+
+
+def add_SWINTS_config(cfg):
+    """
+    Add config for SWINTS.
+    """
+    cfg.MODEL.SWINTS = CN()
+    cfg.MODEL.SWINTS.NUM_CLASSES = 80
+    cfg.MODEL.SWINTS.NUM_PROPOSALS = 300
+    cfg.MODEL.SWINTS.TEST_NUM_PROPOSALS = 100
+
+    # RCNN Head.
+    cfg.MODEL.SWINTS.NHEADS = 8
+    cfg.MODEL.SWINTS.DROPOUT = 0.0
+    cfg.MODEL.SWINTS.DIM_FEEDFORWARD = 2048
+    cfg.MODEL.SWINTS.ACTIVATION = 'relu'
+    cfg.MODEL.SWINTS.HIDDEN_DIM = 256
+    cfg.MODEL.SWINTS.NUM_CLS = 3
+    cfg.MODEL.SWINTS.NUM_REG = 3
+    cfg.MODEL.SWINTS.NUM_MASK = 3
+    cfg.MODEL.SWINTS.NUM_HEADS = 6
+
+    cfg.MODEL.SWINTS.MASK_DIM = 60
+
+
+    # Dynamic Conv.
+    cfg.MODEL.SWINTS.NUM_DYNAMIC = 2
+    cfg.MODEL.SWINTS.DIM_DYNAMIC = 64
+
+    # Recognition Head
+    cfg.MODEL.REC_HEAD = CN()
+    cfg.MODEL.REC_HEAD.BATCH_SIZE = 48
+    cfg.MODEL.REC_HEAD.POOLER_RESOLUTION = (28,28)
+    cfg.MODEL.REC_HEAD.RESOLUTION = (32, 32)
+    cfg.MODEL.REC_HEAD.NUM_CLASSES = 107
+
+    # Loss.
+    cfg.MODEL.SWINTS.CLASS_WEIGHT = 2.0
+    cfg.MODEL.SWINTS.GIOU_WEIGHT = 2.0
+    cfg.MODEL.SWINTS.L1_WEIGHT = 5.0
+    cfg.MODEL.SWINTS.REC_WEIGHT = 1.0
+    cfg.MODEL.SWINTS.DEEP_SUPERVISION = True
+    cfg.MODEL.SWINTS.NO_OBJECT_WEIGHT = 0.1
+    cfg.MODEL.SWINTS.MASK_WEIGHT = 2.0
+
+    # Focal Loss.
+    cfg.MODEL.SWINTS.ALPHA = 0.25
+    cfg.MODEL.SWINTS.GAMMA = 2.0
+    cfg.MODEL.SWINTS.PRIOR_PROB = 0.01
+
+    # Optimizer.
+    cfg.SOLVER.OPTIMIZER = "ADAMW"
+    cfg.SOLVER.BACKBONE_MULTIPLIER = 1.0
+
+    # Matcher
+    cfg.MODEL.SWINTS.IOU_THRESHOLDS = [0.5]
+    cfg.MODEL.SWINTS.IOU_LABELS = [0, 1]
+
+    # Encoder
+    cfg.MODEL.SWINTS.PATH_COMPONENTS = "./src/sts/projects/SWINTS/LME/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz"
+    
+    # SWINT backbone
+    cfg.MODEL.SWINT = CN()
+    cfg.MODEL.SWINT.EMBED_DIM = 96
+    cfg.MODEL.SWINT.OUT_FEATURES = ["stage2", "stage3", "stage4", "stage5"]
+    cfg.MODEL.SWINT.DEPTHS = [2, 2, 6, 2]
+    cfg.MODEL.SWINT.NUM_HEADS = [3, 6, 12, 24]
+    cfg.MODEL.SWINT.WINDOW_SIZE = 7
+    cfg.MODEL.SWINT.MLP_RATIO = 4
+    cfg.MODEL.SWINT.DROP_PATH_RATE = 0.2
+    cfg.MODEL.SWINT.APE = False
+    cfg.MODEL.BACKBONE.FREEZE_AT = -1
+
+    # addation
+    cfg.MODEL.FPN.TOP_LEVELS = 2
+
+    # Test config
+    cfg.TEST.USE_NMS_IN_TSET = True
+    cfg.TEST.INFERENCE_TH_TEST = 0.4
\ No newline at end of file
diff --git a/src/sts/projects/SWINTS/swints/dataset_mapper.py b/src/sts/projects/SWINTS/swints/dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8d6af13294c86fb5e1dc21c3680b1e9595f87aa
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/dataset_mapper.py
@@ -0,0 +1,142 @@
+import copy
+import logging
+
+import numpy as np
+import torch
+
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.data.transforms import TransformGen
+from detectron2.structures import BoxMode
+from PIL import Image
+
+__all__ = ["SWINTSDatasetMapper"]
+
+
+def build_transform_gen(cfg, is_train):
+    """
+    Create a list of :class:`TransformGen` from config.
+    Returns:
+        list[TransformGen]
+    """
+    if is_train:
+        min_size = cfg.INPUT.MIN_SIZE_TRAIN
+        max_size = cfg.INPUT.MAX_SIZE_TRAIN
+        sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
+    else:
+        min_size = cfg.INPUT.MIN_SIZE_TEST
+        max_size = cfg.INPUT.MAX_SIZE_TEST
+        sample_style = "choice"
+    if sample_style == "range":
+        assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format(len(min_size))
+
+    logger = logging.getLogger(__name__)
+    tfm_gens = []
+    tfm_gens.append(T.RandomBrightness(0.5,2))
+    tfm_gens.append(T.RandomContrast(0.5,2))
+    tfm_gens.append(T.RandomSaturation(0.5,2))
+    tfm_gens.append(T.ResizeShortestEdge(min_size, max_size, sample_style))
+    if is_train:
+        logger.info("TransformGens used in training: " + str(tfm_gens))
+    return tfm_gens
+
+@torch.no_grad()
+class SWINTSDatasetMapper:
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by SparseRCNN.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies geometric transforms to the image and annotation
+    3. Find and applies suitable cropping to the image and annotation
+    4. Prepare image and annotation to Tensors
+    """
+
+    def __init__(self, cfg, is_train=True):
+        if cfg.INPUT.CROP.ENABLED and is_train:
+            self.crop_gen = [
+                #T.ResizeShortestEdge([400, 500, 600], sample_style="choice"),
+                #T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE),
+                T.RandomCropWithInstance(
+                    cfg.INPUT.CROP.TYPE,
+                    cfg.INPUT.CROP.SIZE,
+                    cfg.INPUT.CROP.CROP_INSTANCE
+                    )
+            ]
+            self.rotate_gen = [
+                    T.RandomRotation(angle=[-90,90],sample_style="range")
+                    ]
+        else:
+            self.crop_gen = None
+        self.tfm_gens = build_transform_gen(cfg, is_train)
+        logging.getLogger(__name__).info(
+            "Full TransformGens used in training: {}, crop: {}".format(str(self.tfm_gens), str(self.crop_gen))
+        )
+
+        self.img_format = cfg.INPUT.FORMAT
+        self.is_train = is_train
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        boxes = np.asarray(
+            [
+                BoxMode.convert(
+                    instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS
+                )
+                for instance in dataset_dict["annotations"]
+            ]
+        )
+        augmentation = []
+        if self.crop_gen is None:
+            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
+        else:
+            if np.random.rand() > 0.5:
+                augmentation = self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:]
+            else:
+                augmentation = self.tfm_gens
+            if np.random.rand() > 0.5:
+                augmentation = augmentation[:-1] + self.rotate_gen + augmentation[-1:]
+            aug_input = T.StandardAugInput(image, boxes=boxes)
+            transforms = aug_input.apply_augmentations(augmentation)
+            image = aug_input.image
+        
+        image_shape = image.shape[:2]  # h, w
+        # print(image_shape)
+
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+
+        if not self.is_train:
+            # USER: Modify this if you want to keep them for some reason.
+            dataset_dict.pop("annotations", None)
+            return dataset_dict
+
+        if "annotations" in dataset_dict:
+            # USER: Modify this if you want to keep them for some reason.
+            for anno in dataset_dict["annotations"]:
+                # anno.pop("segmentation", None)
+                anno.pop("keypoints", None)
+
+            # USER: Implement additional transformations if you have other types of data
+            annos = [
+                utils.transform_instance_annotations(obj, transforms, image_shape)
+                for obj in dataset_dict.pop("annotations")
+                if obj.get("iscrowd", 0) == 0
+            ]
+            instances = utils.annotations_to_instances(annos, image_shape)
+            dataset_dict["instances"] = utils.filter_empty_instances(instances)
+        return dataset_dict
diff --git a/src/sts/projects/SWINTS/swints/head.py b/src/sts/projects/SWINTS/swints/head.py
new file mode 100644
index 0000000000000000000000000000000000000000..7540b076dc21797f0e8af321c2e0800a57e2342e
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/head.py
@@ -0,0 +1,493 @@
+import copy
+import math
+from typing import Optional, List
+
+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+
+from detectron2.modeling.poolers import ROIPooler, cat
+from detectron2.structures import Boxes, pairwise_iou
+
+from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, get_norm
+
+from detectron2.modeling.matcher import Matcher
+from .rec_stage import REC_STAGE   
+
+_DEFAULT_SCALE_CLAMP = math.log(100000.0 / 16)
+
+def _get_src_permutation_idx(indices):
+# permute predictions following indices
+    batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+    src_idx = torch.cat([src for (src, _) in indices])
+    return batch_idx, src_idx
+
+def _get_tgt_permutation_idx(indices):
+# permute targets following indices
+    batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+    tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+    return batch_idx, tgt_idx
+
+class DynamicHead(nn.Module):
+
+    def __init__(self, cfg, roi_input_shape):
+        super().__init__()
+
+        # Build RoI.
+        box_pooler = self._init_box_pooler(cfg, roi_input_shape)
+        self.box_pooler = box_pooler
+        box_pooler_rec = self._init_box_pooler_rec(cfg, roi_input_shape)
+        self.box_pooler_rec = box_pooler_rec
+
+        # Build heads.
+        num_classes = cfg.MODEL.SWINTS.NUM_CLASSES
+        self.hidden_dim = cfg.MODEL.SWINTS.HIDDEN_DIM
+        dim_feedforward = cfg.MODEL.SWINTS.DIM_FEEDFORWARD
+        nhead = cfg.MODEL.SWINTS.NHEADS
+        dropout = cfg.MODEL.SWINTS.DROPOUT
+        activation = cfg.MODEL.SWINTS.ACTIVATION
+        self.train_num_proposal = cfg.MODEL.SWINTS.NUM_PROPOSALS
+        self.num_heads = cfg.MODEL.SWINTS.NUM_HEADS
+        rcnn_head = RCNNHead(cfg, self.hidden_dim, num_classes, dim_feedforward, nhead, dropout, activation)
+        self.head_series = _get_clones(rcnn_head, self.num_heads)
+        self.return_intermediate = cfg.MODEL.SWINTS.DEEP_SUPERVISION
+        
+        self.cfg =cfg
+
+        # Build recognition heads
+        self.rec_stage = REC_STAGE(cfg, self.hidden_dim, num_classes, dim_feedforward, nhead, dropout, activation)
+        self.cnn = nn.Sequential(
+                                nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,1),
+                                nn.BatchNorm2d(self.hidden_dim),
+                                nn.ReLU(True),
+                                nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,1),
+                                nn.BatchNorm2d(self.hidden_dim),
+                                nn.ReLU(True),
+                                )
+
+        #DC
+        self.conv = nn.ModuleList([
+                           nn.Sequential(
+                           nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,2,2),
+                           nn.BatchNorm2d(self.hidden_dim),
+                           nn.ReLU(True),                    
+                           nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,4,4),              
+                           nn.BatchNorm2d(self.hidden_dim),                                   
+                           nn.ReLU(True),                        
+                           nn.Conv2d(self.hidden_dim, self.hidden_dim,3,1,1),              
+                           nn.BatchNorm2d(self.hidden_dim),                                 
+                           nn.ReLU(True),)                                     
+                           for i in range(4) 
+                           ]                 
+                           )
+        
+        
+        # Init parameters.
+        self.num_classes = num_classes
+        prior_prob = cfg.MODEL.SWINTS.PRIOR_PROB
+        self.bias_value = -math.log((1 - prior_prob) / prior_prob)
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        # init all parameters.
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+
+            # initialize the bias for focal loss.
+            if p.shape[-1] == self.num_classes:
+                nn.init.constant_(p, self.bias_value)
+
+    @staticmethod
+    def _init_box_pooler(cfg, input_shape):
+
+        in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+
+        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
+        # then we share the same predictors and therefore the channel counts must be the same
+        in_channels = [input_shape[f].channels for f in in_features]
+        # Check all channel counts are equal
+        assert len(set(in_channels)) == 1, in_channels
+
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        return box_pooler
+    @staticmethod
+    def _init_box_pooler_rec(cfg, input_shape):
+        in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        pooler_resolution = cfg.MODEL.REC_HEAD.POOLER_RESOLUTION
+        pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features)
+        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+
+        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
+        # then we share the same predictors and therefore the channel counts must be the same
+        in_channels = [input_shape[f].channels for f in in_features]
+        # Check all channel counts are equal
+        assert len(set(in_channels)) == 1, in_channels
+        box_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales= pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type=pooler_type,
+        )
+        return box_pooler
+   
+    def extra_rec_feat(self, matcher, mask_encoding, targets, N, bboxes, class_logits, pred_bboxes, mask_logits, proposal_features, features):
+        gt_masks = list()
+        gt_boxes = list()
+        proposal_boxes_pred = list()
+        masks_pred = list()
+        pred_mask = mask_logits.detach()
+
+        N, nr_boxes = bboxes.shape[:2]
+        if targets:
+            output = {'pred_logits': class_logits, 'pred_boxes': pred_bboxes, 'pred_masks': mask_logits}
+            indices = matcher(output, targets, mask_encoding)
+            idx = _get_src_permutation_idx(indices)
+            target_rec = torch.cat([t['rec'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+            target_rec = target_rec.repeat(2,1)
+        else:
+            idx = None
+            scores = torch.sigmoid(class_logits)
+            labels = torch.arange(2, device=bboxes.device).\
+                    unsqueeze(0).repeat(self.train_num_proposal, 1).flatten(0, 1)
+            inter_class_logits = []
+            inter_pred_bboxes = []
+            inter_pred_masks = []
+            inter_pred_label = []
+        for b in range(N):
+            if targets:
+                gt_boxes.append(Boxes(targets[b]['boxes_xyxy'][indices[b][1]]))
+                gt_masks.append(targets[b]['gt_masks'][indices[b][1]])
+                proposal_boxes_pred.append(Boxes(bboxes[b][indices[b][0]]))
+                tmp_mask = mask_encoding.decoder(pred_mask[b]).view(-1,28,28)
+                tmp_mask = tmp_mask[indices[b][0]]
+                tmp_mask2 = torch.full_like(tmp_mask,0).cuda()
+                tmp_mask2[tmp_mask>0.4]=1
+                masks_pred.append(tmp_mask2)
+            else:
+                # post_processing
+                num_proposals = self.cfg.MODEL.SWINTS.TEST_NUM_PROPOSALS
+                scores_per_image, topk_indices = scores[b].flatten(0, 1).topk(num_proposals, sorted=False)
+                labels_per_image = labels[topk_indices]
+                box_pred_per_image = bboxes[b].view(-1, 1, 4).repeat(1, 2, 1).view(-1, 4)
+                box_pred_per_image = box_pred_per_image[topk_indices]
+                mask_pred_per_image = mask_logits.view(-1, self.cfg.MODEL.SWINTS.MASK_DIM)
+                mask_pred_per_image = mask_encoding.decoder(mask_pred_per_image, is_train=False)
+                mask_pred_per_image = mask_pred_per_image.view(-1, 1, 28, 28)
+                n, c, w, h = mask_pred_per_image.size()
+                mask_pred_per_image = torch.repeat_interleave(mask_pred_per_image,2,1).view(-1, c, w, h)
+                mask_pred_per_image = mask_pred_per_image[topk_indices]
+                proposal_features = proposal_features[b].view(-1, 1, self.hidden_dim).repeat(1, 2, 1).view(-1, self.hidden_dim)
+                proposal_features = proposal_features[topk_indices]
+                proposal_boxes_pred.append(Boxes(box_pred_per_image))
+                gt_masks.append(mask_pred_per_image)
+                inter_class_logits.append(scores_per_image)
+                inter_pred_bboxes.append(box_pred_per_image)
+                inter_pred_masks.append(mask_pred_per_image)
+                inter_pred_label.append(labels_per_image)
+
+        # get recognition roi region
+        if targets:
+            gt_roi_features = self.box_pooler_rec(features, gt_boxes)
+            pred_roi_features = self.box_pooler_rec(features, proposal_boxes_pred)
+            masks_pred = torch.cat(masks_pred).cuda()
+            gt_masks = torch.cat(gt_masks).cuda()
+            rec_map = torch.cat((gt_roi_features,pred_roi_features),0)
+            gt_masks = torch.cat((gt_masks,masks_pred),0)
+        else:
+            rec_map = self.box_pooler_rec(features, proposal_boxes_pred)
+            gt_masks = torch.cat(gt_masks).cuda()
+            nr_boxes = rec_map.shape[0]
+        if targets:
+            rec_map = rec_map[:self.cfg.MODEL.REC_HEAD.BATCH_SIZE]
+        else:
+            gt_masks_b = torch.full_like(gt_masks,0).cuda()
+            gt_masks_b[gt_masks>0.4]=1
+            gt_masks_b = gt_masks_b.squeeze()
+            gt_masks = gt_masks_b
+            del gt_masks_b
+        if targets:
+            return proposal_features, gt_masks[:self.cfg.MODEL.REC_HEAD.BATCH_SIZE], idx, rec_map, target_rec[:self.cfg.MODEL.REC_HEAD.BATCH_SIZE]
+        else:
+            return inter_class_logits, inter_pred_bboxes, inter_pred_masks, inter_pred_label, proposal_features, gt_masks, idx, rec_map, nr_boxes
+
+    def forward(self, features, init_bboxes, init_features, targets = None, mask_encoding = None, matcher=None):
+    
+        inter_class_logits = []
+        inter_pred_bboxes = []
+        inter_pred_masks = []
+        inter_pred_label = []
+
+        bs = len(features[0])
+        bboxes = init_bboxes
+        proposal_features = init_features.clone()
+        for i_idx in range(len(features)):
+           features[i_idx] = self.conv[i_idx](features[i_idx]) + features[i_idx]
+        for i, rcnn_head in enumerate(self.head_series):
+
+            class_logits, pred_bboxes, proposal_features, mask_logits = rcnn_head(features, bboxes, proposal_features, self.box_pooler)
+            if self.return_intermediate:
+                inter_class_logits.append(class_logits)
+                inter_pred_bboxes.append(pred_bboxes)
+                inter_pred_masks.append(mask_logits)
+            bboxes = pred_bboxes.detach()
+        
+        # extract recognition feature.
+        N, nr_boxes = bboxes.shape[:2]
+        if targets:
+            proposal_features, gt_masks, idx, rec_map, target_rec = \
+                self.extra_rec_feat(matcher, mask_encoding, targets, N, bboxes, class_logits, pred_bboxes, mask_logits, proposal_features, features)
+        else:
+            inter_class_logits, inter_pred_bboxes, inter_pred_masks, inter_pred_label, proposal_features, gt_masks, idx, rec_map, nr_boxes = \
+                self.extra_rec_feat(matcher, mask_encoding, targets, N, bboxes, class_logits, pred_bboxes, mask_logits, proposal_features, features)
+       
+        rec_map = self.cnn(rec_map)
+        rec_proposal_features = proposal_features.clone()
+
+        if targets:
+            rec_result = self.rec_stage(rec_map, rec_proposal_features, gt_masks, N, nr_boxes, idx, target_rec)
+        else:
+            rec_result = self.rec_stage(rec_map, rec_proposal_features, gt_masks, N, nr_boxes)
+            rec_result = torch.tensor(rec_result)
+        if self.return_intermediate:
+            return torch.stack(inter_class_logits), torch.stack(inter_pred_bboxes), torch.stack(inter_pred_masks), rec_result
+        return class_logits[None], pred_bboxes[None], mask_logits[None]
+
+
+class RCNNHead(nn.Module):
+
+    def __init__(self, cfg, d_model, num_classes, dim_feedforward=2048, nhead=8, dropout=0.1, activation="relu",
+                 scale_clamp: float = _DEFAULT_SCALE_CLAMP, bbox_weights=(2.0, 2.0, 1.0, 1.0)):
+        super().__init__()
+
+        self.d_model = d_model
+
+        # dynamic.
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.inst_interact = DynamicConv(cfg)
+
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+
+        self.activation = nn.ELU(inplace=True)
+
+        # cls.
+        num_cls = cfg.MODEL.SWINTS.NUM_CLS
+        cls_module = list()
+        for _ in range(num_cls):
+            cls_module.append(nn.Linear(d_model, d_model, False))
+            cls_module.append(nn.LayerNorm(d_model))
+            cls_module.append(nn.ELU(inplace=True))
+        self.cls_module = nn.ModuleList(cls_module)
+
+        # reg.
+        num_reg = cfg.MODEL.SWINTS.NUM_REG
+        reg_module = list()
+        for _ in range(num_reg):
+            reg_module.append(nn.Linear(d_model, d_model, False))
+            reg_module.append(nn.LayerNorm(d_model))
+            reg_module.append(nn.ELU(inplace=True))
+        self.reg_module = nn.ModuleList(reg_module)
+
+        # mask.
+        num_mask = cfg.MODEL.SWINTS.NUM_MASK
+        mask_module = list()
+        for _ in range(num_mask):
+            mask_module.append(nn.Linear(d_model, d_model, False))
+            mask_module.append(nn.LayerNorm(d_model))
+            mask_module.append(nn.ELU(inplace=True))
+        self.mask_module = nn.ModuleList(mask_module)
+        self.mask_logits = nn.Linear(d_model, cfg.MODEL.SWINTS.MASK_DIM)
+
+        # pred.
+        self.class_logits = nn.Linear(d_model, num_classes)
+        self.bboxes_delta = nn.Linear(d_model, 4)
+        self.scale_clamp = scale_clamp
+        self.bbox_weights = bbox_weights
+
+
+    def forward(self, features, bboxes, pro_features, pooler):
+        """
+        :param bboxes: (N, nr_boxes, 4)
+        :param pro_features: (N, nr_boxes, d_model)
+        """
+
+        N, nr_boxes = bboxes.shape[:2]
+        
+        # roi_feature.
+        proposal_boxes = list()
+        for b in range(N):
+            proposal_boxes.append(Boxes(bboxes[b]))
+        roi_features = pooler(features, proposal_boxes)
+        roi_features = roi_features.view(N * nr_boxes, self.d_model, -1).permute(2, 0, 1)        
+
+        # self_att.
+        pro_features = pro_features.view(N, nr_boxes, self.d_model).permute(1, 0, 2)
+        pro_features2 = self.self_attn(pro_features, pro_features, value=pro_features)[0]
+        pro_features = pro_features + self.dropout1(pro_features2)
+
+        del pro_features2
+
+        pro_features = self.norm1(pro_features)
+
+        # inst_interact.
+        pro_features = pro_features.view(nr_boxes, N, self.d_model).permute(1, 0, 2).reshape(1, N * nr_boxes, self.d_model)
+        pro_features2 = self.inst_interact(pro_features, roi_features)
+        pro_features = pro_features + self.dropout2(pro_features2)
+
+        del pro_features2
+
+        obj_features = self.norm2(pro_features)
+
+        # obj_feature.
+        obj_features2 = self.linear2(self.dropout(self.activation(self.linear1(obj_features))))
+        obj_features = obj_features + self.dropout3(obj_features2)
+
+        del obj_features2
+
+        obj_features = self.norm3(obj_features)
+        
+        fc_feature = obj_features.transpose(0, 1).reshape(N * nr_boxes, -1)
+        cls_feature = fc_feature.clone()
+        reg_feature = fc_feature.clone()
+
+        mask_feature = fc_feature.clone()
+
+        del fc_feature
+
+        for mask_layer in self.mask_module:
+            mask_feature = mask_layer(mask_feature)
+        mask_logits = self.mask_logits(mask_feature)
+        del mask_feature
+
+        for cls_layer in self.cls_module:
+            cls_feature = cls_layer(cls_feature)
+        for reg_layer in self.reg_module:
+            reg_feature = reg_layer(reg_feature)
+        class_logits = self.class_logits(cls_feature)
+        bboxes_deltas = self.bboxes_delta(reg_feature)
+
+        del cls_feature
+        del reg_feature
+
+        pred_bboxes = self.apply_deltas(bboxes_deltas, bboxes.view(-1, 4))
+        
+        return class_logits.view(N, nr_boxes, -1), pred_bboxes.view(N, nr_boxes, -1), obj_features, mask_logits.view(N, nr_boxes, -1)
+    
+
+    def apply_deltas(self, deltas, boxes):
+        """
+        Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`.
+
+        Args:
+            deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1.
+                deltas[i] represents k potentially different class-specific
+                box transformations for the single box boxes[i].
+            boxes (Tensor): boxes to transform, of shape (N, 4)
+        """
+        boxes = boxes.to(deltas.dtype)
+
+        widths = boxes[:, 2] - boxes[:, 0]
+        heights = boxes[:, 3] - boxes[:, 1]
+        ctr_x = boxes[:, 0] + 0.5 * widths
+        ctr_y = boxes[:, 1] + 0.5 * heights
+
+        wx, wy, ww, wh = self.bbox_weights
+        dx = deltas[:, 0::4] / wx
+        dy = deltas[:, 1::4] / wy
+        dw = deltas[:, 2::4] / ww
+        dh = deltas[:, 3::4] / wh
+
+        # Prevent sending too large values into torch.exp()
+        dw = torch.clamp(dw, max=self.scale_clamp)
+        dh = torch.clamp(dh, max=self.scale_clamp)
+
+        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
+        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
+        pred_w = torch.exp(dw) * widths[:, None]
+        pred_h = torch.exp(dh) * heights[:, None]
+
+        pred_boxes = torch.zeros_like(deltas)
+        pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w  # x1
+        pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h  # y1
+        pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w  # x2
+        pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h  # y2
+
+        return pred_boxes
+
+
+class DynamicConv(nn.Module):
+
+    def __init__(self, cfg):
+        super().__init__()
+
+        self.hidden_dim = cfg.MODEL.SWINTS.HIDDEN_DIM
+        self.dim_dynamic = cfg.MODEL.SWINTS.DIM_DYNAMIC
+        self.num_dynamic = cfg.MODEL.SWINTS.NUM_DYNAMIC
+        self.num_params = self.hidden_dim * self.dim_dynamic
+        self.dynamic_layer = nn.Linear(self.hidden_dim, self.num_dynamic * self.num_params)
+
+        self.norm1 = nn.LayerNorm(self.dim_dynamic)
+        self.norm2 = nn.LayerNorm(self.hidden_dim)
+
+        self.activation = nn.ELU(inplace=True)
+
+        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        num_output = self.hidden_dim * pooler_resolution ** 2
+        self.out_layer = nn.Linear(num_output, self.hidden_dim)
+        self.norm3 = nn.LayerNorm(self.hidden_dim)
+
+    def forward(self, pro_features, roi_features):
+        '''
+        pro_features: (1,  N * nr_boxes, self.d_model)
+        roi_features: (49, N * nr_boxes, self.d_model)
+        '''
+        features = roi_features.permute(1, 0, 2)
+        parameters = self.dynamic_layer(pro_features).permute(1, 0, 2)
+
+        param1 = parameters[:, :, :self.num_params].view(-1, self.hidden_dim, self.dim_dynamic)
+        param2 = parameters[:, :, self.num_params:].view(-1, self.dim_dynamic, self.hidden_dim)
+
+        del parameters
+
+        features = torch.bmm(features, param1)
+
+        del param1
+
+        features = self.norm1(features)
+        features = self.activation(features)
+
+        features = torch.bmm(features, param2)
+
+        del param2
+
+        features = self.norm2(features)
+        features = self.activation(features)
+
+        features = features.flatten(1)
+        features = self.out_layer(features)
+        features = self.norm3(features)
+        features = self.activation(features)
+
+        return features
+
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
diff --git a/src/sts/projects/SWINTS/swints/loss.py b/src/sts/projects/SWINTS/swints/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0edff08e5a2a825e01a1ef7fc6d995cb98c210a
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/loss.py
@@ -0,0 +1,227 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+from fvcore.nn import sigmoid_focal_loss_jit
+
+from .util import box_ops
+from .util.misc import (NestedTensor, nested_tensor_from_tensor_list,
+                       accuracy, get_world_size, interpolate,
+                       is_dist_avail_and_initialized)
+from .util.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
+
+from scipy.optimize import linear_sum_assignment
+
+
+class SetCriterion(nn.Module):
+    def __init__(self, cfg, num_classes, matcher, weight_dict, eos_coef, losses):
+        super().__init__()
+        self.cfg = cfg
+        self.num_classes = num_classes
+        self.matcher = matcher
+        self.weight_dict = weight_dict
+        self.eos_coef = eos_coef
+        self.losses = losses
+        self.cfg = cfg
+
+        self.focal_loss_alpha = cfg.MODEL.SWINTS.ALPHA
+        self.focal_loss_gamma = cfg.MODEL.SWINTS.GAMMA
+
+    def loss_labels(self, outputs, targets, indices, num_boxes, mask_encoding):
+        assert 'pred_logits' in outputs
+        src_logits = outputs['pred_logits']
+
+        idx = self._get_src_permutation_idx(indices)
+        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
+        target_classes = torch.full(src_logits.shape[:2], self.num_classes,
+                                    dtype=torch.int64, device=src_logits.device)
+        target_classes[idx] = target_classes_o
+
+        src_logits = src_logits.flatten(0, 1)
+
+        target_classes = target_classes.flatten(0, 1)
+        pos_inds = torch.nonzero(target_classes != self.num_classes, as_tuple=True)[0]
+        labels = torch.zeros_like(src_logits)
+        labels[pos_inds, target_classes[pos_inds]] = 1
+
+        class_loss = sigmoid_focal_loss_jit(
+            src_logits,
+            labels,
+            alpha=self.focal_loss_alpha,
+            gamma=self.focal_loss_gamma,
+            reduction="sum",
+        ) / num_boxes
+        losses = {'loss_ce': class_loss}
+
+        return losses
+
+
+    def loss_boxes(self, outputs, targets, indices, num_boxes, mask_encoding):
+        assert 'pred_boxes' in outputs
+        idx = self._get_src_permutation_idx(indices)
+        src_boxes = outputs['pred_boxes'][idx]
+        target_boxes = torch.cat([t['boxes_xyxy'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+
+        losses = {}
+        loss_giou = 1 - torch.diag(box_ops.generalized_box_iou(src_boxes, target_boxes))
+        losses['loss_giou'] = loss_giou.sum() / num_boxes
+
+        image_size = torch.cat([v["image_size_xyxy_tgt"] for v in targets])
+        src_boxes_ = src_boxes / image_size
+        target_boxes_ = target_boxes / image_size
+
+        loss_bbox = F.l1_loss(src_boxes_, target_boxes_, reduction='none')
+        losses['loss_bbox'] = loss_bbox.sum() / num_boxes
+
+        return losses
+
+    def loss_masks(self, outputs, targets, indices, num_boxes, mask_encoding):
+        assert 'pred_masks' in outputs
+        idx = self._get_src_permutation_idx(indices)
+        src_masks_feat = outputs['pred_masks'][idx]
+        target_masks = torch.cat([t['gt_masks'][i] for t, (_, i) in zip(targets, indices)], dim=0)
+        mask_loss_func = nn.MSELoss(reduction="none")
+
+        target_masks_feat = mask_encoding.encoder(target_masks.flatten(1))
+        loss = mask_loss_func(src_masks_feat, target_masks_feat)
+        
+        losses = {}
+        losses['loss_feat'] =  loss.sum() / num_boxes / self.cfg.MODEL.SWINTS.MASK_DIM
+
+        eps = 1e-5
+        src_masks = mask_encoding.decoder(src_masks_feat.flatten(1))
+        n_inst = src_masks.size(0)
+        target_masks = target_masks.flatten(1)
+        intersection = (src_masks * target_masks).sum(dim=1)
+        union = (src_masks ** 2.0).sum(dim=1) + (target_masks ** 2.0).sum(dim=1) + eps
+        loss = 1. - (2 * intersection / union)
+        losses['loss_dice'] = loss.sum() / num_boxes
+
+        return losses
+    def loss_rec(self, outputs, targets, indices, num_boxes, mask_encoding):
+        """Classification loss (NLL)
+        targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
+        """
+        src_rec = outputs['pred_rec']
+        losses = {}
+        losses['loss_rec'] = src_rec
+        return losses
+
+    def _get_src_permutation_idx(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        return batch_idx, src_idx
+
+    def _get_tgt_permutation_idx(self, indices):
+        # permute targets following indices
+        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+        return batch_idx, tgt_idx
+
+    def get_loss(self, loss, outputs, targets, indices, num_boxes, mask_encoding, **kwargs):
+        loss_map = {
+            'labels': self.loss_labels,
+            'boxes': self.loss_boxes,
+            'masks': self.loss_masks,
+            'rec': self.loss_rec
+        }
+        assert loss in loss_map, f'do you really want to compute {loss} loss?'
+        return loss_map[loss](outputs, targets, indices, num_boxes, mask_encoding, **kwargs)
+
+    def forward(self, outputs, targets, mask_encoding):
+        outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs'}
+
+        # Retrieve the matching between the outputs of the last layer and the targets
+        indices = self.matcher(outputs_without_aux, targets, mask_encoding)
+
+        # Compute the average number of target boxes accross all nodes, for normalization purposes
+        num_boxes = sum(len(t["labels"]) for t in targets)
+        num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
+        if is_dist_avail_and_initialized():
+            torch.distributed.all_reduce(num_boxes)
+        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
+
+        # Compute all the requested losses
+        losses = {}
+        for loss in self.losses:
+            losses.update(self.get_loss(loss, outputs, targets, indices, num_boxes, mask_encoding))
+
+        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
+        if 'aux_outputs' in outputs:
+            for i, aux_outputs in enumerate(outputs['aux_outputs']):
+                indices = self.matcher(aux_outputs, targets, mask_encoding)
+                for loss in self.losses:
+                    # if loss == 'masks':
+                    #     # Intermediate masks losses are too costly to compute, we ignore them.
+                    #     continue
+                    if loss == 'rec':
+                        continue
+                    kwargs = {}
+                    l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, mask_encoding, **kwargs)
+                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+        return losses
+
+
+
+class HungarianMatcher(nn.Module):
+    def __init__(self, cfg, cost_class: float = 1, cost_bbox: float = 1, cost_giou: float = 1, cost_mask: float = 1):
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        self.cost_mask = cost_mask
+        self.focal_loss_alpha = cfg.MODEL.SWINTS.ALPHA
+        self.focal_loss_gamma = cfg.MODEL.SWINTS.GAMMA
+        assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, "all costs cant be 0"
+
+    @torch.no_grad()
+    def forward(self, outputs, targets, mask_encoding):
+        bs, num_queries = outputs["pred_logits"].shape[:2]
+
+        out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()  # [batch_size * num_queries, num_classes]
+        out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+        # Also concat the target labels and boxes
+        tgt_ids = torch.cat([v["labels"] for v in targets])
+        tgt_bbox = torch.cat([v["boxes_xyxy"] for v in targets])
+
+        
+        alpha = self.focal_loss_alpha
+        gamma = self.focal_loss_gamma
+        neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
+        pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+        cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
+
+        # Compute the L1 cost between boxes
+        image_size_out = torch.cat([v["image_size_xyxy"].unsqueeze(0) for v in targets])
+        image_size_out = image_size_out.unsqueeze(1).repeat(1, num_queries, 1).flatten(0, 1)
+        image_size_tgt = torch.cat([v["image_size_xyxy_tgt"] for v in targets])
+
+        out_bbox_ = out_bbox / image_size_out
+        tgt_bbox_ = tgt_bbox / image_size_tgt
+        cost_bbox = torch.cdist(out_bbox_, tgt_bbox_, p=1)
+
+        # Compute the giou cost betwen boxes
+        # cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
+        cost_giou = -generalized_box_iou(out_bbox, tgt_bbox)
+
+        # mask loss
+        tgt_mask = torch.cat([v["gt_masks"] for v in targets]).flatten(1)
+        tgt_mask_feat = mask_encoding.encoder(tgt_mask)
+        out_mask_feat = outputs["pred_masks"].flatten(0, 1).flatten(1)
+
+        tgt_mask_feat = nn.functional.normalize(tgt_mask_feat, p=2)
+        out_mask_feat = nn.functional.normalize(out_mask_feat, p=2)
+        
+        # cost_mask = -torch.mm(out_mask, tgt_mask.T)
+        cost_mask = -(torch.mm(out_mask_feat, tgt_mask_feat.T) + 1.0) / 2.0
+
+        # Final cost matrix
+        C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou + self.cost_mask * cost_mask
+        C = C.view(bs, num_queries, -1).cpu()
+
+        sizes = [len(v["boxes"]) for v in targets]
+        indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
+        return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
diff --git a/src/sts/projects/SWINTS/swints/rec_stage.py b/src/sts/projects/SWINTS/swints/rec_stage.py
new file mode 100644
index 0000000000000000000000000000000000000000..66ad1c23f652e5bdbdf16aa2e73eecabd5650237
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/rec_stage.py
@@ -0,0 +1,207 @@
+import torch
+from torch import nn, Tensor
+from .FocalTransformer import FocalTransformerBlock
+from .transformer import PositionalEncoding
+from .roi_seq_predictors import SequencePredictor
+
+class DynamicConv_v2(nn.Module):
+    
+    def __init__(self, cfg):
+        super().__init__()
+
+        self.hidden_dim = cfg.MODEL.SWINTS.HIDDEN_DIM
+        self.dim_dynamic = cfg.MODEL.SWINTS.DIM_DYNAMIC
+        self.num_dynamic = cfg.MODEL.SWINTS.NUM_DYNAMIC
+        self.num_params = self.hidden_dim * self.dim_dynamic
+        self.dynamic_layer = nn.Linear(self.hidden_dim, self.num_dynamic * self.num_params)
+
+
+        self.norm1 = nn.LayerNorm(self.dim_dynamic)
+        self.norm2 = nn.LayerNorm(self.hidden_dim)
+
+        self.activation = nn.ELU(inplace=True)
+
+    def forward(self, pro_features, roi_features):
+        '''
+        pro_features: (1,  N * nr_boxes, self.d_model)
+        roi_features: (rec_resolution, N * nr_boxes, self.d_model)
+        '''
+        features = roi_features.permute(1, 0, 2)
+        parameters = self.dynamic_layer(pro_features).permute(1, 0, 2)
+
+        param1 = parameters[:, :, :self.num_params].view(-1, self.hidden_dim, self.dim_dynamic)
+        param2 = parameters[:, :, self.num_params:].view(-1, self.dim_dynamic, self.hidden_dim)
+        del parameters
+
+        features = torch.bmm(features, param1)
+      
+        del param1
+        features = self.norm1(features)
+        features = self.activation(features)
+
+        features = torch.bmm(features, param2)
+
+        del param2
+
+        features = self.norm2(features)
+        features = self.activation(features)
+
+        return features
+
+class REC_STAGE(nn.Module):
+
+    def __init__(self, cfg, d_model, num_classes, dim_feedforward=2048, nhead=8, dropout=0.2, activation="relu"):
+        super().__init__()
+
+        self.d_model = d_model
+
+        # dynamic.
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.inst_interact = DynamicConv_v2(cfg)
+
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+
+        self.activation = nn.ELU(inplace=True)
+
+        self.feat_size = cfg.MODEL.REC_HEAD.POOLER_RESOLUTION
+        self.rec_batch_size = cfg.MODEL.REC_HEAD.BATCH_SIZE
+        self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=4)
+        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=3)
+
+        self.TLSAM =  nn.Sequential(
+            FocalTransformerBlock(dim=256, input_resolution=self.feat_size, num_heads=8, window_size=7, expand_size=0, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.2,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm, pool_method="fc", 
+                 focal_level=2, focal_window=3, use_layerscale=False, layerscale_value=1e-4),
+                FocalTransformerBlock(dim=256, input_resolution=self.feat_size, num_heads=8, window_size=7, expand_size=0, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.2,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm, pool_method="fc", 
+                 focal_level=2, focal_window=3, use_layerscale=False, layerscale_value=1e-4),FocalTransformerBlock(dim=256, input_resolution=self.feat_size, num_heads=8, window_size=7, expand_size=0, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.2,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm, pool_method="fc", 
+                 focal_level=2, focal_window=3, use_layerscale=False, layerscale_value=1e-4)
+                 )
+
+        self.pos_encoder = PositionalEncoding(self.d_model, max_len=(self.feat_size[0]//4)*(self.feat_size[1]//4))
+        num_channels = d_model
+        in_channels = d_model
+        mode = 'nearest'
+        self.k_encoder = nn.Sequential(
+            encoder_layer(num_channels, num_channels, s=(2, 2)),
+            encoder_layer(num_channels, num_channels, s=(2, 2))
+        )
+        self.k_decoder_det = nn.Sequential(
+            decoder_layer_worelu(num_channels, num_channels, scale_factor=2, mode=mode),
+            decoder_layer_worelu(num_channels, num_channels, scale_factor=2, mode=mode),
+            decoder_layer(num_channels, in_channels, size=(self.feat_size[0], self.feat_size[1]), mode=mode)
+        )
+        self.k_decoder_rec = nn.Sequential(
+            decoder_layer(num_channels, num_channels, scale_factor=2, mode=mode),
+            decoder_layer(num_channels, num_channels, scale_factor=2, mode=mode),
+        )
+
+        self.seq_decoder = SequencePredictor(cfg, d_model)
+        self.rescale = nn.Upsample(size=(self.feat_size[0], self.feat_size[1]), mode="bilinear", align_corners=False)
+
+    def forward(self, roi_features, pro_features, gt_masks, N, nr_boxes, idx=None, targets=None):
+        """
+        :param bboxes: (N, nr_boxes, 4)
+        :param pro_features: (N, nr_boxes, d_model)
+        """
+        features = []
+        k = roi_features
+        for i in range(0, len(self.k_encoder)):
+            k = self.k_encoder[i](k)
+            features.append(k)
+        n,c,h,w = k.size()
+        k = k.view(n, c, -1).permute(2, 0, 1)
+       # self_att.
+        pro_features = pro_features.view(N, nr_boxes, self.d_model).permute(1, 0, 2)
+        pro_features2 = self.self_attn(pro_features, pro_features, value=pro_features)[0]
+        pro_features = pro_features + self.dropout1(pro_features2)
+
+        del pro_features2
+
+        pro_features = self.norm1(pro_features)
+   
+   #     # inst_interact.
+        if idx:
+            pro_features = pro_features.permute(1, 0, 2)[idx]
+            pro_features = pro_features.repeat(2,1)[:self.rec_batch_size]
+        else:
+            pro_features = pro_features.permute(1, 0, 2)
+        pro_features = pro_features.reshape(1, -1, self.d_model)
+        pro_features2 = self.inst_interact(pro_features, k)
+        pro_features = k.permute(1,0,2) + self.dropout2(pro_features2)
+
+        del pro_features2
+
+        obj_features = self.norm2(pro_features)
+
+   #     # obj_feature.
+        obj_features2 = self.linear2(self.dropout(self.activation(self.linear1(obj_features))))
+        obj_features = obj_features + self.dropout3(obj_features2)
+
+        del obj_features2
+        obj_features = self.norm3(obj_features)
+        obj_features = obj_features.permute(1,0,2)
+        obj_features = self.pos_encoder(obj_features)
+        obj_features = self.transformer_encoder(obj_features)
+        obj_features = obj_features.permute(1,2,0)
+        n,c,w = obj_features.shape
+        obj_features = obj_features.view(n,c,self.feat_size[0]//4,self.feat_size[1]//4)
+        obj_features = obj_features
+        k = k.permute(1,2,0)
+        k = k.view(n,c,self.feat_size[0]//4,self.feat_size[1]//4)
+        k_rec = k*obj_features.sigmoid()
+        k_rec = self.k_decoder_rec[0](k_rec)
+        k_rec = k_rec + features[0]
+
+        k_det = obj_features
+        k_det = self.k_decoder_det[0](k_det)
+        k_det = k_det + features[0]
+        k_rec = k_rec * k_det.sigmoid()
+
+        k_rec = self.k_decoder_rec[1](k_rec) + roi_features
+        k_det = self.k_decoder_det[1](k_det) + roi_features
+        k_rec = k_rec * k_det.sigmoid()
+
+        k_rec = self.k_decoder_det[-1](k_rec)
+        k_rec = k_rec.flatten(-2,-1).permute(0,2,1)
+        k_rec = self.TLSAM(k_rec)
+        k_rec = k_rec.permute(0,2,1).view(n,c,self.feat_size[0],self.feat_size[1])
+        gt_masks = self.rescale(gt_masks.unsqueeze(1))
+        k_rec = k_rec*gt_masks
+        attn_vecs = self.seq_decoder(k_rec, targets, targets)
+        return attn_vecs
+
+def encoder_layer(in_c, out_c, k=3, s=2, p=1):
+    return nn.Sequential(nn.Conv2d(in_c, out_c, k, s, p),
+                         nn.BatchNorm2d(out_c),
+                         nn.ReLU(True))
+
+def decoder_layer(in_c, out_c, k=3, s=1, p=1, mode='nearest', scale_factor=None, size=None):
+    align_corners = None if mode=='nearest' else True
+    return nn.Sequential(nn.Upsample(size=size, scale_factor=scale_factor,
+                                     mode=mode, align_corners=align_corners),
+                         nn.Conv2d(in_c, out_c, k, s, p),
+                         nn.BatchNorm2d(out_c),
+                         nn.ReLU(True))
+
+def decoder_layer_worelu(in_c, out_c, k=3, s=1, p=1, mode='nearest', scale_factor=None, size=None):
+    align_corners = None if mode=='nearest' else True
+    return nn.Sequential(nn.Upsample(size=size, scale_factor=scale_factor,
+                                   mode=mode, align_corners=align_corners),
+                         nn.Conv2d(in_c, in_c, k, s, p),
+                         nn.BatchNorm2d(in_c),
+                         nn.ReLU(True),
+                         nn.Conv2d(in_c, out_c, k, s, p))
diff --git a/src/sts/projects/SWINTS/swints/roi_seq_predictors.py b/src/sts/projects/SWINTS/swints/roi_seq_predictors.py
new file mode 100644
index 0000000000000000000000000000000000000000..565c1c681ecadf87f9b8b0927ea1e6e7a35bac3f
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/roi_seq_predictors.py
@@ -0,0 +1,382 @@
+# Written by Minghui Liao
+import math
+import random
+
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+gpu_device = torch.device("cuda")
+cpu_device = torch.device("cpu")
+
+
+def reduce_mul(l):
+    out = 1.0
+    for x in l:
+        out *= x
+    return out
+
+
+def check_all_done(seqs):
+    for seq in seqs:
+        if not seq[-1]:
+            return False
+    return True
+
+def num2char(num):
+    CTLABELS = [' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/','0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','´', "~", "ˋ", "ˊ","﹒", "ˀ", "˜", "ˇ", "ˆ", "˒","‑"]
+    char = chars[num]
+    return char
+
+# TODO
+class SequencePredictor(nn.Module):
+    def __init__(self,cfg, dim_in ):
+        super(SequencePredictor, self).__init__()
+        self.seq_encoder = nn.Sequential(
+            nn.Conv2d(dim_in, dim_in, 3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, stride=2, ceil_mode=True),
+        )
+        self.MAX_LENGTH = 100
+        RESIZE_WIDTH = cfg.MODEL.REC_HEAD.RESOLUTION[1]
+        RESIZE_HEIGHT = cfg.MODEL.REC_HEAD.RESOLUTION[0]
+        self.RESIZE_WIDTH = RESIZE_WIDTH
+        self.RESIZE_HEIGHT = RESIZE_HEIGHT
+        x_onehot_size = int(RESIZE_WIDTH / 2)
+        y_onehot_size = int(RESIZE_HEIGHT / 2)
+        self.num_class = cfg.MODEL.REC_HEAD.NUM_CLASSES
+        self.seq_decoder = BahdanauAttnDecoderRNN(
+            256, self.num_class, self.num_class, n_layers=1, dropout_p=0.1, onehot_size = (y_onehot_size, x_onehot_size)
+        )
+        # self.criterion_seq_decoder = nn.NLLLoss(ignore_index = -1, reduce=False)
+        self.criterion_seq_decoder = nn.NLLLoss(ignore_index=-1, reduction="none")
+        # self.rescale = nn.Upsample(size=(16, 64), mode="bilinear", align_corners=False)
+        self.rescale = nn.Upsample(size=(RESIZE_HEIGHT, RESIZE_WIDTH), mode="bilinear", align_corners=False)
+
+        self.x_onehot = nn.Embedding(x_onehot_size, x_onehot_size)
+        self.x_onehot.weight.data = torch.eye(x_onehot_size)
+        self.y_onehot = nn.Embedding(y_onehot_size, y_onehot_size)
+        self.y_onehot.weight.data = torch.eye(y_onehot_size)
+
+        for name, param in self.named_parameters():
+            if "bias" in name:
+                nn.init.constant_(param, 0)
+            elif "weight" in name:
+                # Caffe2 implementation uses MSRAFill, which in fact
+                # corresponds to kaiming_normal_ in PyTorch
+                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
+
+    def forward(
+        self, x, decoder_targets=None, word_targets=None, use_beam_search=False
+    ):
+        rescale_out = self.rescale(x)
+        seq_decoder_input = self.seq_encoder(rescale_out)
+        x_onehot_size = int(self.RESIZE_WIDTH / 2)
+        y_onehot_size = int(self.RESIZE_HEIGHT / 2)
+        x_t, y_t = np.meshgrid(np.linspace(0, x_onehot_size - 1, x_onehot_size), np.linspace(0, y_onehot_size - 1, y_onehot_size))
+        x_t = torch.LongTensor(x_t, device=cpu_device).cuda()
+        y_t = torch.LongTensor(y_t, device=cpu_device).cuda()
+        x_onehot_embedding = (
+            self.x_onehot(x_t)
+            .transpose(0, 2)
+            .transpose(1, 2)
+            .repeat(seq_decoder_input.size(0), 1, 1, 1)
+        )
+        y_onehot_embedding = (
+            self.y_onehot(y_t)
+            .transpose(0, 2)
+            .transpose(1, 2)
+            .repeat(seq_decoder_input.size(0), 1, 1, 1)
+        )
+        seq_decoder_input_loc = torch.cat(
+            [seq_decoder_input, x_onehot_embedding, y_onehot_embedding], 1
+        )
+        seq_decoder_input_reshape = (
+            seq_decoder_input_loc.view(
+                seq_decoder_input_loc.size(0), seq_decoder_input_loc.size(1), -1
+            )
+            .transpose(0, 2)
+            .transpose(1, 2)
+        )
+        if self.training:
+            bos_onehot = np.zeros(
+                (seq_decoder_input_reshape.size(1), 1), dtype=np.int32
+            )
+            bos_onehot[:, 0] = 0
+            decoder_input = torch.tensor(bos_onehot.tolist(), device=gpu_device)
+            decoder_hidden = torch.zeros(
+                (seq_decoder_input_reshape.size(1), 256), device=gpu_device
+            )
+            use_teacher_forcing = (
+                True
+                if random.random() < 1
+                else False
+            )
+            target_length = decoder_targets.size(1)
+            if use_teacher_forcing:
+                # Teacher forcing: Feed the target as the next input
+                for di in range(target_length):
+                    decoder_output, decoder_hidden, decoder_attention = self.seq_decoder(
+                        decoder_input, decoder_hidden, seq_decoder_input_reshape
+                    )
+                    if di == 0:
+                        loss_seq_decoder = self.criterion_seq_decoder(
+                            decoder_output, word_targets[:, di]
+                        )
+                    else:
+                        loss_seq_decoder += self.criterion_seq_decoder(
+                            decoder_output, word_targets[:, di]
+                        )
+                    decoder_input = decoder_targets[:, di]  # Teacher forcing
+            else:
+                # Without teacher forcing: use its own predictions as the next input
+                for di in range(target_length):
+                    decoder_output, decoder_hidden, decoder_attention = self.seq_decoder(
+                        decoder_input, decoder_hidden, seq_decoder_input_reshape
+                    )
+                    topv, topi = decoder_output.topk(1)
+                    decoder_input = topi.squeeze(
+                        1
+                    ).detach()  # detach from history as input
+                    if di == 0:
+                        loss_seq_decoder = self.criterion_seq_decoder(
+                            decoder_output, word_targets[:, di]
+                        )
+                    else:
+                        loss_seq_decoder += self.criterion_seq_decoder(
+                            decoder_output, word_targets[:, di]
+                        )
+            loss_seq_decoder = loss_seq_decoder.sum() / loss_seq_decoder.size(0)
+            loss_seq_decoder = 0.2 * loss_seq_decoder
+            return loss_seq_decoder
+        else:
+            words = []
+            decoded_scores = []
+            detailed_decoded_scores = []
+            # real_length = 0
+            if use_beam_search:
+                for batch_index in range(seq_decoder_input_reshape.size(1)):
+                    decoder_hidden = torch.zeros((1, 256), device=gpu_device)
+                    word = []
+                    char_scores = []
+                    detailed_char_scores = []
+                    top_seqs = self.beam_search(
+                        seq_decoder_input_reshape[:, batch_index : batch_index + 1, :],
+                        decoder_hidden,
+                        beam_size=6,
+                        max_len=self.MAX_LENGTH,
+                    )
+                    top_seq = top_seqs[0]
+                    for character in top_seq[1:]:
+                        character_index = character[0]
+                        if character_index == self.cfg.SEQUENCE.NUM_CHAR:
+                            char_scores.append(character[1])
+                            detailed_char_scores.append(character[2])
+                            break
+                        else:
+                            if character_index == 0:
+                                word.append("~")
+                                char_scores.append(0.0)
+                            else:
+                                word.append(num2char(character_index))
+                                char_scores.append(character[1])
+                                detailed_char_scores.append(character[2])
+                    words.append("".join(word))
+                    decoded_scores.append(char_scores)
+                    detailed_decoded_scores.append(detailed_char_scores)
+            else:
+                for batch_index in range(seq_decoder_input_reshape.size(1)):
+                    bos_onehot = np.zeros((1, 1), dtype=np.int32)
+                    bos_onehot[:, 0] = 0
+                    decoder_input = torch.tensor(bos_onehot.tolist(), device=gpu_device)
+                    decoder_hidden = torch.zeros((1, 256), device=gpu_device)
+                    word = []
+                    char_scores = []
+                    for di in range(self.MAX_LENGTH):
+                        decoder_output, decoder_hidden, decoder_attention = self.seq_decoder(
+                            decoder_input,
+                            decoder_hidden,
+                            seq_decoder_input_reshape[
+                                :, batch_index : batch_index + 1, :
+                            ],
+                        )
+                        # decoder_attentions[di] = decoder_attention.data
+                        topv, topi = decoder_output.data.topk(1)
+                        char_scores.append(topv.item())
+                        if topi.item() == 0:
+                            break
+                        else:
+                            if topi.item() == 0:
+                                word.append(topi.item())
+                            else:
+                                word.append(topi.item())
+
+                        # real_length = di
+                        decoder_input = topi.squeeze(1).detach()
+                    tmp = np.zeros((self.MAX_LENGTH), dtype=np.int32)
+                    tmp[:len(word)] = torch.tensor(word)
+                    word = tmp
+                    words.append(word)
+                    decoded_scores.append(char_scores)
+            return words
+
+    def beam_search_step(self, encoder_context, top_seqs, k):
+        all_seqs = []
+        for seq in top_seqs:
+            seq_score = reduce_mul([_score for _, _score, _, _ in seq])
+            if seq[-1][0] == self.cfg.SEQUENCE.NUM_CHAR - 1:
+                all_seqs.append((seq, seq_score, seq[-1][2], True))
+                continue
+            decoder_hidden = seq[-1][-1][0]
+            onehot = np.zeros((1, 1), dtype=np.int32)
+            onehot[:, 0] = seq[-1][0]
+            decoder_input = torch.tensor(onehot.tolist(), device=gpu_device)
+            decoder_output, decoder_hidden, decoder_attention = self.seq_decoder(
+                decoder_input, decoder_hidden, encoder_context
+            )
+            detailed_char_scores = decoder_output.cpu().numpy()
+            # print(decoder_output.shape)
+            scores, candidates = decoder_output.data[:, 1:].topk(k)
+            for i in range(k):
+                character_score = scores[:, i]
+                character_index = candidates[:, i]
+                score = seq_score * character_score.item()
+                char_score = seq_score * detailed_char_scores
+                rs_seq = seq + [
+                    (
+                        character_index.item() + 1,
+                        character_score.item(),
+                        char_score,
+                        [decoder_hidden],
+                    )
+                ]
+                done = character_index.item() + 1 == 38
+                all_seqs.append((rs_seq, score, char_score, done))
+        all_seqs = sorted(all_seqs, key=lambda seq: seq[1], reverse=True)
+        topk_seqs = [seq for seq, _, _, _ in all_seqs[:k]]
+        all_done = check_all_done(all_seqs[:k])
+        return topk_seqs, all_done
+
+    def beam_search(self, encoder_context, decoder_hidden, beam_size=6, max_len=32):
+        char_score = np.zeros(self.cfg.SEQUENCE.NUM_CHAR)
+        top_seqs = [[(self.cfg.SEQUENCE.BOS_TOKEN, 1.0, char_score, [decoder_hidden])]]
+        # loop
+        for _ in range(max_len):
+            top_seqs, all_done = self.beam_search_step(
+                encoder_context, top_seqs, beam_size
+            )
+            if all_done:
+                break
+        return top_seqs
+
+
+class Attn(nn.Module):
+    def __init__(self, method, hidden_size, embed_size, onehot_size):
+        super(Attn, self).__init__()
+        self.method = method
+        self.hidden_size = hidden_size
+        self.embed_size = embed_size
+        self.attn = nn.Linear(2 * self.hidden_size + onehot_size, hidden_size)
+        # self.attn = nn.Linear(hidden_size, hidden_size)
+        self.v = nn.Parameter(torch.rand(hidden_size))
+        stdv = 1.0 / math.sqrt(self.v.size(0))
+        self.v.data.normal_(mean=0, std=stdv)
+
+    def forward(self, hidden, encoder_outputs):
+        """
+        :param hidden:
+            previous hidden state of the decoder, in shape (B, hidden_size)
+        :param encoder_outputs:
+            encoder outputs from Encoder, in shape (H*W, B, hidden_size)
+        :return
+            attention energies in shape (B, H*W)
+        """
+        max_len = encoder_outputs.size(0)
+        # this_batch_size = encoder_outputs.size(1)
+        H = hidden.repeat(max_len, 1, 1).transpose(0, 1)  # (B, H*W, hidden_size)
+        encoder_outputs = encoder_outputs.transpose(0, 1)  # (B, H*W, hidden_size)
+        attn_energies = self.score(
+            H, encoder_outputs
+        )  # compute attention score (B, H*W)
+        return F.softmax(attn_energies, dim=1).unsqueeze(
+            1
+        )  # normalize with softmax (B, 1, H*W)
+
+    def score(self, hidden, encoder_outputs):
+        energy = torch.tanh(
+            self.attn(torch.cat([hidden, encoder_outputs], 2))
+        )  # (B, H*W, 2*hidden_size+H+W)->(B, H*W, hidden_size)
+        energy = energy.transpose(2, 1)  # (B, hidden_size, H*W)
+        v = self.v.repeat(encoder_outputs.data.shape[0], 1).unsqueeze(
+            1
+        )  # (B, 1, hidden_size)
+        energy = torch.bmm(v, energy)  # (B, 1, H*W)
+        return energy.squeeze(1)  # (B, H*W)
+
+
+class BahdanauAttnDecoderRNN(nn.Module):
+    def __init__(
+        self,
+        hidden_size,
+        embed_size,
+        output_size,
+        n_layers=1,
+        dropout_p=0,
+        bidirectional=False,
+        onehot_size = (8, 32)
+    ):
+        super(BahdanauAttnDecoderRNN, self).__init__()
+        # Define parameters
+        self.hidden_size = hidden_size
+        self.embed_size = embed_size
+        self.output_size = output_size
+        self.n_layers = n_layers
+        self.dropout_p = dropout_p
+        # Define layers
+        self.embedding = nn.Embedding(output_size, embed_size)
+        self.embedding.weight.data = torch.eye(embed_size)
+        # self.dropout = nn.Dropout(dropout_p)
+        self.word_linear = nn.Linear(embed_size, hidden_size)
+        self.attn = Attn("concat", hidden_size, embed_size, onehot_size[0] + onehot_size[1])
+        self.rnn = nn.GRUCell(2 * hidden_size + onehot_size[0] + onehot_size[1], hidden_size)
+        self.out = nn.Linear(hidden_size, output_size)
+
+    def forward(self, word_input, last_hidden, encoder_outputs):
+        """
+        :param word_input:
+            word input for current time step, in shape (B)
+        :param last_hidden:
+            last hidden stat of the decoder, in shape (layers*direction*B, hidden_size)
+        :param encoder_outputs:
+            encoder outputs in shape (H*W, B, C)
+        :return
+            decoder output
+        """
+        # Get the embedding of the current input word (last output word)
+        word_embedded_onehot = self.embedding(word_input).view(
+            1, word_input.size(0), -1
+        )  # (1,B,embed_size)
+        word_embedded = self.word_linear(word_embedded_onehot)  # (1, B, hidden_size)
+        attn_weights = self.attn(last_hidden, encoder_outputs)  # (B, 1, H*W)
+        context = attn_weights.bmm(
+            encoder_outputs.transpose(0, 1)
+        )  # (B, 1, H*W) * (B, H*W, C) = (B,1,C)
+        context = context.transpose(0, 1)  # (1,B,C)
+        # Combine embedded input word and attended context, run through RNN
+        # 2 * hidden_size + W + H: 256 + 256 + 32 + 8 = 552
+        rnn_input = torch.cat((word_embedded, context), 2)
+        last_hidden = last_hidden.view(last_hidden.size(0), -1)
+        rnn_input = rnn_input.view(word_input.size(0), -1)
+        hidden = self.rnn(rnn_input, last_hidden)
+        if not self.training:
+            output = F.softmax(self.out(hidden), dim=1)
+        else:
+            output = F.log_softmax(self.out(hidden), dim=1)
+        # Return final output, hidden state
+        # print(output.shape)
+        return output, hidden, attn_weights
+
+
+def make_roi_seq_predictor(cfg, dim_in):
+    return SequencePredictor(cfg, dim_in)
diff --git a/src/sts/projects/SWINTS/swints/swints.py b/src/sts/projects/SWINTS/swints/swints.py
new file mode 100644
index 0000000000000000000000000000000000000000..180cf949523a0d6a703d008d03b0200e1be6caa3
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/swints.py
@@ -0,0 +1,285 @@
+import logging
+import math
+from typing import List
+
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn.functional as F
+from torch import nn
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling import META_ARCH_REGISTRY, build_backbone, detector_postprocess
+from detectron2.modeling.roi_heads import build_roi_heads
+
+from detectron2.structures import Boxes, ImageList, Instances
+from detectron2.utils.logger import log_first_n
+from fvcore.nn import giou_loss, smooth_l1_loss
+
+from .loss import SetCriterion, HungarianMatcher
+from .head import DynamicHead
+from .util.box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh
+from .util.misc import (NestedTensor, nested_tensor_from_tensor_list,
+                       accuracy, get_world_size, interpolate,
+                       is_dist_avail_and_initialized)
+
+from detectron2.layers import Conv2d, get_norm
+from .MaskEncoding import PCAMaskEncoding
+from detectron2.modeling.backbone import PatchEmbed
+
+__all__ = ["SWINTS"]
+
+
+class ImgFeatExtractor(nn.Module):
+    def __init__(self, cfg):
+        super().__init__()
+        # self.img_feat_layer = nn.AdaptiveAvgPool2d(1)
+        self.cfg = cfg
+
+    def forward(self, features):
+        for i, f in enumerate(features):
+            if i == 0:
+                x = torch.mean(torch.mean(f, -1), -1) #self.img_feat_layer(f)
+            else:
+                x_p = torch.mean(torch.mean(f, -1), -1) #self.img_feat_layer(f)
+                x = x + x_p
+
+        img_feats = x.squeeze(-1).squeeze(-1).unsqueeze(1).repeat(1, self.cfg.MODEL.SWINTS.NUM_PROPOSALS, 1,)
+
+        del x_p
+        del x
+        
+        return img_feats
+
+
+@META_ARCH_REGISTRY.register()
+class SWINTS(nn.Module):
+
+    def __init__(self, cfg):
+        super().__init__()
+
+        self.cfg = cfg
+
+        self.device = torch.device(cfg.MODEL.DEVICE)
+
+        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
+        self.num_classes = cfg.MODEL.SWINTS.NUM_CLASSES
+        self.num_proposals = cfg.MODEL.SWINTS.NUM_PROPOSALS
+        self.hidden_dim = cfg.MODEL.SWINTS.HIDDEN_DIM
+        self.num_heads = cfg.MODEL.SWINTS.NUM_HEADS
+
+        # Build Backbone.
+        self.backbone = build_backbone(cfg)
+        self.size_divisibility = self.backbone.size_divisibility
+        
+        # Build Proposals.
+        self.pos_embeddings = nn.Embedding(self.num_proposals, self.hidden_dim)
+        self.init_proposal_boxes = nn.Embedding(self.num_proposals, 4)
+        nn.init.constant_(self.init_proposal_boxes.weight[:, :2], 0.5)
+        nn.init.constant_(self.init_proposal_boxes.weight[:, 2:], 1.0)
+
+        # --------
+        self.IFE = ImgFeatExtractor(cfg)
+        self.mask_encoding = PCAMaskEncoding(cfg)
+        # encoding parameters.
+        components_path = cfg.MODEL.SWINTS.PATH_COMPONENTS
+        # update parameters.
+        parameters = np.load(components_path)
+        components = nn.Parameter(torch.from_numpy(parameters['components_c'][0]).float().to(self.device),requires_grad=False)
+        explained_variances = nn.Parameter(torch.from_numpy(parameters['explained_variance_c'][0]).float().to(self.device), requires_grad=False)
+        means = nn.Parameter(torch.from_numpy(parameters['mean_c'][0]).float().to(self.device),requires_grad=False)
+        self.mask_encoding.components = components
+        self.mask_encoding.explained_variances = explained_variances
+        self.mask_encoding.means = means
+        
+        # Build Dynamic Head.
+        self.head = DynamicHead(cfg=cfg, roi_input_shape=self.backbone.output_shape())
+
+        # Loss parameters:
+        class_weight = cfg.MODEL.SWINTS.CLASS_WEIGHT
+        giou_weight = cfg.MODEL.SWINTS.GIOU_WEIGHT
+        l1_weight = cfg.MODEL.SWINTS.L1_WEIGHT
+        rec_weight = cfg.MODEL.SWINTS.REC_WEIGHT
+        no_object_weight = cfg.MODEL.SWINTS.NO_OBJECT_WEIGHT
+        mask_weight = cfg.MODEL.SWINTS.MASK_WEIGHT
+
+        self.deep_supervision = cfg.MODEL.SWINTS.DEEP_SUPERVISION
+
+        # Build Criterion.
+        matcher = HungarianMatcher(cfg=cfg,
+                                   cost_class=class_weight, 
+                                   cost_bbox=l1_weight, 
+                                   cost_giou=giou_weight,
+                                   cost_mask=mask_weight)
+        self.matcher = matcher
+        weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight, "loss_feat": mask_weight, "loss_dice": mask_weight}
+        if self.deep_supervision:
+            aux_weight_dict = {}
+            for i in range(self.num_heads - 1):
+                aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()})
+            weight_dict.update(aux_weight_dict)
+        weight_dict["loss_rec"] = rec_weight
+        losses = ["labels", "boxes", "masks", "rec"]
+
+        self.criterion = SetCriterion(cfg=cfg,
+                                      num_classes=self.num_classes,
+                                      matcher=matcher,
+                                      weight_dict=weight_dict,
+                                      eos_coef=no_object_weight,
+                                      losses=losses)
+
+        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
+        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
+        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
+        self.to(self.device)
+
+
+    def forward(self, batched_inputs):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
+                Each item in the list contains the inputs for one image.
+                For now, each item in the list is a dict that contains:
+
+                * image: Tensor, image in (C, H, W) format.
+                * instances: Instances
+
+                Other information that's included in the original dicts, such as:
+
+                * "height", "width" (int): the output resolution of the model, used in inference.
+                  See :meth:`postprocess` for details.
+        """
+        images, images_whwh = self.preprocess_image(batched_inputs)
+        if isinstance(images, (list, torch.Tensor)):
+            images = nested_tensor_from_tensor_list(images)
+
+        # Feature Extraction.
+        src = self.backbone(images.tensor)
+
+        features = list()      
+        for f in self.in_features:
+            feature = src[f]
+            features.append(feature)
+
+        # Prepare Proposals.
+        proposal_boxes = self.init_proposal_boxes.weight.clone()
+        proposal_boxes = box_cxcywh_to_xyxy(proposal_boxes)
+        proposal_boxes = proposal_boxes[None] * images_whwh[:, None, :]
+
+        img_feats = self.IFE(features)
+        bs = len(features[0])
+        pos_embeddings = self.pos_embeddings.weight[None].repeat(bs, 1, 1)
+        proposal_feats = img_feats + pos_embeddings
+        
+        del img_feats
+        if self.training:
+            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+            targets = self.prepare_targets(gt_instances)
+            outputs_class, outputs_coord, outputs_mask,out_rec = self.head(features, proposal_boxes, proposal_feats, targets, mask_encoding=self.mask_encoding, matcher=self.matcher)
+            output = {'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1], 'pred_masks': outputs_mask[-1], 'pred_rec': out_rec}
+            if self.deep_supervision:
+                output['aux_outputs'] = [{'pred_logits': a, 'pred_boxes': b, 'pred_masks': c}
+                                         for a, b, c in zip(outputs_class[:-1], outputs_coord[:-1], outputs_mask[:-1])]
+
+            loss_dict = self.criterion(output, targets, self.mask_encoding)
+            weight_dict = self.criterion.weight_dict
+            for k in loss_dict.keys():
+                if k in weight_dict:
+                    loss_dict[k] *= weight_dict[k]
+            return loss_dict
+
+        else:
+            outputs_class, outputs_coord, outputs_mask,out_rec = self.head(features, proposal_boxes, proposal_feats, mask_encoding=self.mask_encoding)
+            output = {'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1], 'pred_masks': outputs_mask[-1]}
+            box_cls = output["pred_logits"]
+            box_pred = output["pred_boxes"]
+            mask_pred = output["pred_masks"].unsqueeze(dim=2)
+            results = Instances(images.image_sizes[0])
+            results.pred_boxes = Boxes(box_pred)
+            results.scores = box_cls
+            results.pred_masks = mask_pred.squeeze(1)
+            results.pred_rec = out_rec
+            results = [results]
+            processed_results = []
+            for results_per_image, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes):
+                height = input_per_image.get("height", image_size[0])
+                width = input_per_image.get("width", image_size[1])
+                r = detector_postprocess(results_per_image, height, width)
+                processed_results.append({"instances": r})
+            
+            return processed_results
+
+    @torch.no_grad()
+    def prepare_targets(self, targets):
+        new_targets = []
+        for targets_per_image in targets:
+            target = {}
+            h, w = targets_per_image.image_size
+            image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float, device=self.device)
+            gt_classes = targets_per_image.gt_classes
+            gt_boxes = targets_per_image.gt_boxes.tensor / image_size_xyxy
+            gt_boxes = box_xyxy_to_cxcywh(gt_boxes)
+            target["labels"] = gt_classes.to(self.device)
+            target["boxes"] = gt_boxes.to(self.device)
+            target["boxes_xyxy"] = targets_per_image.gt_boxes.tensor.to(self.device)
+            target["image_size_xyxy"] = image_size_xyxy.to(self.device)
+            image_size_xyxy_tgt = image_size_xyxy.unsqueeze(0).repeat(len(gt_boxes), 1)
+            target["image_size_xyxy_tgt"] = image_size_xyxy_tgt.to(self.device)
+            target["area"] = targets_per_image.gt_boxes.area().to(self.device)
+
+            target["gt_masks"] = targets_per_image.gt_masks.to(self.device)
+            masks = target['gt_masks'].crop_and_resize(targets_per_image.gt_boxes, 28)
+            target["gt_masks"] = masks.float()
+            target["rec"] = targets_per_image.rec.to(self.device)
+            new_targets.append(target)
+
+        return new_targets
+
+    @torch.no_grad()
+    def inference(self, box_cls, box_pred, mask_pred, image_sizes, recred):
+        """
+        Arguments:
+            box_cls (Tensor): tensor of shape (batch_size, num_proposals, K).
+                The tensor predicts the classification probability for each proposal.
+            box_pred (Tensor): tensors of shape (batch_size, num_proposals, 4).
+                The tensor predicts 4-vector (x,y,w,h) box
+                regression values for every proposal
+            image_sizes (List[torch.Size]): the input image sizes
+
+        Returns:
+            results (List[Instances]): a list of #images elements.
+        """
+        assert len(box_cls) == len(image_sizes)
+        results = []
+        #
+        scores = torch.sigmoid(box_cls)
+        labels = torch.arange(self.num_classes, device=self.device).\
+                 unsqueeze(0).repeat(self.num_proposals, 1).flatten(0, 1)
+        for i, (scores_per_image, box_pred_per_image, mask_pred_per_image, image_size, rec_per_image) in enumerate(zip(
+                scores, box_pred, mask_pred, image_sizes, rec_pred
+        )):
+            result = Instances(image_size)
+            scores_per_image, topk_indices = scores_per_image.flatten(0, 1).topk(self.num_proposals, sorted=False)
+            labels_per_image = labels[topk_indices]
+            result.pred_boxes = Boxes(box_pred_per_image)
+            result.scores = scores_per_image
+            result.pred_classes = labels_per_image
+            result.pred_masks = mask_pred_per_image
+            result.pred_rec = rec_per_image
+            results.append(result)
+        return results
+
+    def preprocess_image(self, batched_inputs):
+        """
+        Normalize, pad and batch the input images.
+        """
+        images = [self.normalizer(x["image"].to(self.device)) for x in batched_inputs]
+        images = ImageList.from_tensors(images, self.size_divisibility)
+
+        images_whwh = list()
+        for bi in batched_inputs:
+            h, w = bi["image"].shape[-2:]
+            images_whwh.append(torch.tensor([w, h, w, h], dtype=torch.float32, device=self.device))
+        images_whwh = torch.stack(images_whwh)
+
+        return images, images_whwh
diff --git a/src/sts/projects/SWINTS/swints/topk.py b/src/sts/projects/SWINTS/swints/topk.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b8ba74ca8bd7e2303f41ba22e3020cc50957755
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/topk.py
@@ -0,0 +1,23 @@
+import heapq
+
+class TopK(object):
+    def __init__(self, k):
+        self.k = k
+        self.data = []
+
+    def reset(self):
+        self.data = []
+
+    def size(self):
+        return len(self.data)
+
+    def push(self, x):
+        if len(self.data) < self.k:
+            heapq.heappush(self.data, x)
+        else:
+            heapq.heappushpop(self.data, x)
+
+    def extract(self, sort=False):
+        if sort:
+            self.data.sort(reverse=True)
+        return self.data
\ No newline at end of file
diff --git a/src/sts/projects/SWINTS/swints/transformer.py b/src/sts/projects/SWINTS/swints/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..6dde312185c7c68f54562885f23ea3b0670e6c40
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/transformer.py
@@ -0,0 +1,901 @@
+# pytorch 1.5.0
+import copy
+import math
+import warnings
+from typing import Optional
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+from torch.nn import Dropout, LayerNorm, Linear, Module, ModuleList, Parameter
+from torch.nn import functional as F
+from torch.nn.init import constant_, xavier_uniform_
+
+
+def multi_head_attention_forward(query,                           # type: Tensor
+                                 key,                             # type: Tensor
+                                 value,                           # type: Tensor
+                                 embed_dim_to_check,              # type: int
+                                 num_heads,                       # type: int
+                                 in_proj_weight,                  # type: Tensor
+                                 in_proj_bias,                    # type: Tensor
+                                 bias_k,                          # type: Optional[Tensor]
+                                 bias_v,                          # type: Optional[Tensor]
+                                 add_zero_attn,                   # type: bool
+                                 dropout_p,                       # type: float
+                                 out_proj_weight,                 # type: Tensor
+                                 out_proj_bias,                   # type: Tensor
+                                 training=True,                   # type: bool
+                                 key_padding_mask=None,           # type: Optional[Tensor]
+                                 need_weights=True,               # type: bool
+                                 attn_mask=None,                  # type: Optional[Tensor]
+                                 use_separate_proj_weight=False,  # type: bool
+                                 q_proj_weight=None,              # type: Optional[Tensor]
+                                 k_proj_weight=None,              # type: Optional[Tensor]
+                                 v_proj_weight=None,              # type: Optional[Tensor]
+                                 static_k=None,                   # type: Optional[Tensor]
+                                 static_v=None                    # type: Optional[Tensor]
+                                 ):
+    # type: (...) -> Tuple[Tensor, Optional[Tensor]]
+    r"""
+    Args:
+        query, key, value: map a query and a set of key-value pairs to an output.
+            See "Attention Is All You Need" for more details.
+        embed_dim_to_check: total dimension of the model.
+        num_heads: parallel attention heads.
+        in_proj_weight, in_proj_bias: input projection weight and bias.
+        bias_k, bias_v: bias of the key and value sequences to be added at dim=0.
+        add_zero_attn: add a new batch of zeros to the key and
+                       value sequences at dim=1.
+        dropout_p: probability of an element to be zeroed.
+        out_proj_weight, out_proj_bias: the output projection weight and bias.
+        training: apply dropout if is ``True``.
+        key_padding_mask: if provided, specified padding elements in the key will
+            be ignored by the attention. This is an binary mask. When the value is True,
+            the corresponding value on the attention layer will be filled with -inf.
+        need_weights: output attn_output_weights.
+        attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
+            the batches while a 3D mask allows to specify a different mask for the entries of each batch.
+        use_separate_proj_weight: the function accept the proj. weights for query, key,
+            and value in different forms. If false, in_proj_weight will be used, which is
+            a combination of q_proj_weight, k_proj_weight, v_proj_weight.
+        q_proj_weight, k_proj_weight, v_proj_weight, in_proj_bias: input projection weight and bias.
+        static_k, static_v: static key and value used for attention operators.
+    Shape:
+        Inputs:
+        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.
+          If a ByteTensor is provided, the non-zero positions will be ignored while the zero positions
+          will be unchanged. If a BoolTensor is provided, the positions with the
+          value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
+        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
+          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
+          S is the source sequence length. attn_mask ensures that position i is allowed to attend the unmasked
+          positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+          while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``
+          are not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
+          is provided, it will be added to the attention weight.
+        - static_k: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,
+          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.
+        - static_v: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,
+          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.
+        Outputs:
+        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
+          E is the embedding dimension.
+        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,
+          L is the target sequence length, S is the source sequence length.
+    """
+    # if not torch.jit.is_scripting():
+    #     tens_ops = (query, key, value, in_proj_weight, in_proj_bias, bias_k, bias_v,
+    #                 out_proj_weight, out_proj_bias)
+    #     if any([type(t) is not Tensor for t in tens_ops]) and has_torch_function(tens_ops):
+    #         return handle_torch_function(
+    #             multi_head_attention_forward, tens_ops, query, key, value,
+    #             embed_dim_to_check, num_heads, in_proj_weight, in_proj_bias,
+    #             bias_k, bias_v, add_zero_attn, dropout_p, out_proj_weight,
+    #             out_proj_bias, training=training, key_padding_mask=key_padding_mask,
+    #             need_weights=need_weights, attn_mask=attn_mask,
+    #             use_separate_proj_weight=use_separate_proj_weight,
+    #             q_proj_weight=q_proj_weight, k_proj_weight=k_proj_weight,
+    #             v_proj_weight=v_proj_weight, static_k=static_k, static_v=static_v)
+    tgt_len, bsz, embed_dim = query.size()
+    assert embed_dim == embed_dim_to_check
+    assert key.size() == value.size()
+
+    head_dim = embed_dim // num_heads
+    assert head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads"
+    scaling = float(head_dim) ** -0.5
+
+    if not use_separate_proj_weight:
+        if torch.equal(query, key) and torch.equal(key, value):
+            # self-attention
+            q, k, v = F.linear(query, in_proj_weight, in_proj_bias).chunk(3, dim=-1)
+
+        elif torch.equal(key, value):
+            # encoder-decoder attention
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = 0
+            _end = embed_dim
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            q = F.linear(query, _w, _b)
+
+            if key is None:
+                assert value is None
+                k = None
+                v = None
+            else:
+
+                # This is inline in_proj function with in_proj_weight and in_proj_bias
+                _b = in_proj_bias
+                _start = embed_dim
+                _end = None
+                _w = in_proj_weight[_start:, :]
+                if _b is not None:
+                    _b = _b[_start:]
+                k, v = F.linear(key, _w, _b).chunk(2, dim=-1)
+
+        else:
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = 0
+            _end = embed_dim
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            q = F.linear(query, _w, _b)
+
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = embed_dim
+            _end = embed_dim * 2
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            k = F.linear(key, _w, _b)
+
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = embed_dim * 2
+            _end = None
+            _w = in_proj_weight[_start:, :]
+            if _b is not None:
+                _b = _b[_start:]
+            v = F.linear(value, _w, _b)
+    else:
+        q_proj_weight_non_opt = torch.jit._unwrap_optional(q_proj_weight)
+        len1, len2 = q_proj_weight_non_opt.size()
+        assert len1 == embed_dim and len2 == query.size(-1)
+
+        k_proj_weight_non_opt = torch.jit._unwrap_optional(k_proj_weight)
+        len1, len2 = k_proj_weight_non_opt.size()
+        assert len1 == embed_dim and len2 == key.size(-1)
+
+        v_proj_weight_non_opt = torch.jit._unwrap_optional(v_proj_weight)
+        len1, len2 = v_proj_weight_non_opt.size()
+        assert len1 == embed_dim and len2 == value.size(-1)
+
+        if in_proj_bias is not None:
+            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias[0:embed_dim])
+            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias[embed_dim:(embed_dim * 2)])
+            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias[(embed_dim * 2):])
+        else:
+            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias)
+            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias)
+            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias)
+    q = q * scaling
+
+    if attn_mask is not None:
+        assert attn_mask.dtype == torch.float32 or attn_mask.dtype == torch.float64 or \
+            attn_mask.dtype == torch.float16 or attn_mask.dtype == torch.uint8 or attn_mask.dtype == torch.bool, \
+            'Only float, byte, and bool types are supported for attn_mask, not {}'.format(attn_mask.dtype)
+        if attn_mask.dtype == torch.uint8:
+            warnings.warn("Byte tensor for attn_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.")
+            attn_mask = attn_mask.to(torch.bool)
+
+        if attn_mask.dim() == 2:
+            attn_mask = attn_mask.unsqueeze(0)
+            if list(attn_mask.size()) != [1, query.size(0), key.size(0)]:
+                raise RuntimeError('The size of the 2D attn_mask is not correct.')
+        elif attn_mask.dim() == 3:
+            if list(attn_mask.size()) != [bsz * num_heads, query.size(0), key.size(0)]:
+                raise RuntimeError('The size of the 3D attn_mask is not correct.')
+        else:
+            raise RuntimeError("attn_mask's dimension {} is not supported".format(attn_mask.dim()))
+        # attn_mask's dim is 3 now.
+
+    # # convert ByteTensor key_padding_mask to bool
+    # if key_padding_mask is not None and key_padding_mask.dtype == torch.uint8:
+    #     warnings.warn("Byte tensor for key_padding_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.")
+    #     key_padding_mask = key_padding_mask.to(torch.bool)
+
+    if bias_k is not None and bias_v is not None:
+        if static_k is None and static_v is None:
+            k = torch.cat([k, bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = pad(attn_mask, (0, 1))
+            if key_padding_mask is not None:
+                key_padding_mask = pad(key_padding_mask, (0, 1))
+        else:
+            assert static_k is None, "bias cannot be added to static key."
+            assert static_v is None, "bias cannot be added to static value."
+    else:
+        assert bias_k is None
+        assert bias_v is None
+
+    q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1)
+    if k is not None:
+        k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)
+    if v is not None:
+        v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)
+
+    if static_k is not None:
+        assert static_k.size(0) == bsz * num_heads
+        assert static_k.size(2) == head_dim
+        k = static_k
+
+    if static_v is not None:
+        assert static_v.size(0) == bsz * num_heads
+        assert static_v.size(2) == head_dim
+        v = static_v
+
+    src_len = k.size(1)
+
+    if key_padding_mask is not None:
+        assert key_padding_mask.size(0) == bsz
+        assert key_padding_mask.size(1) == src_len
+
+    if add_zero_attn:
+        src_len += 1
+        k = torch.cat([k, torch.zeros((k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device)], dim=1)
+        v = torch.cat([v, torch.zeros((v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device)], dim=1)
+        if attn_mask is not None:
+            attn_mask = pad(attn_mask, (0, 1))
+        if key_padding_mask is not None:
+            key_padding_mask = pad(key_padding_mask, (0, 1))
+
+    attn_output_weights = torch.bmm(q, k.transpose(1, 2))
+    assert list(attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len]
+
+    if attn_mask is not None:
+        if attn_mask.dtype == torch.bool:
+            attn_output_weights.masked_fill_(attn_mask, float('-inf'))
+        else:
+            attn_output_weights += attn_mask
+
+
+    if key_padding_mask is not None:
+        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)
+        attn_output_weights = attn_output_weights.masked_fill(
+            key_padding_mask.unsqueeze(1).unsqueeze(2),
+            float('-inf'),
+        )
+        attn_output_weights = attn_output_weights.view(bsz * num_heads, tgt_len, src_len)
+
+    attn_output_weights = F.softmax(
+        attn_output_weights, dim=-1)
+    attn_output_weights = F.dropout(attn_output_weights, p=dropout_p, training=training)
+
+    attn_output = torch.bmm(attn_output_weights, v)
+    assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim]
+    attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+    attn_output = F.linear(attn_output, out_proj_weight, out_proj_bias)
+
+    if need_weights:
+        # average attention weights over heads
+        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)
+        return attn_output, attn_output_weights.sum(dim=1) / num_heads
+    else:
+        return attn_output, None
+
+class MultiheadAttention(Module):
+    r"""Allows the model to jointly attend to information
+    from different representation subspaces.
+    See reference: Attention Is All You Need
+    .. math::
+        \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
+        \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
+    Args:
+        embed_dim: total dimension of the model.
+        num_heads: parallel attention heads.
+        dropout: a Dropout layer on attn_output_weights. Default: 0.0.
+        bias: add bias as module parameter. Default: True.
+        add_bias_kv: add bias to the key and value sequences at dim=0.
+        add_zero_attn: add a new batch of zeros to the key and
+                       value sequences at dim=1.
+        kdim: total number of features in key. Default: None.
+        vdim: total number of features in value. Default: None.
+        Note: if kdim and vdim are None, they will be set to embed_dim such that
+        query, key, and value have the same number of features.
+    Examples::
+        >>> multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)
+        >>> attn_output, attn_output_weights = multihead_attn(query, key, value)
+    """
+    # __annotations__ = {
+    #     'bias_k': torch._jit_internal.Optional[torch.Tensor],
+    #     'bias_v': torch._jit_internal.Optional[torch.Tensor],
+    # }
+    __constants__ = ['q_proj_weight', 'k_proj_weight', 'v_proj_weight', 'in_proj_weight']
+
+    def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None):
+        super(MultiheadAttention, self).__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim
+
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+
+        if self._qkv_same_embed_dim is False:
+            self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))
+            self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))
+            self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))
+            self.register_parameter('in_proj_weight', None)
+        else:
+            self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim))
+            self.register_parameter('q_proj_weight', None)
+            self.register_parameter('k_proj_weight', None)
+            self.register_parameter('v_proj_weight', None)
+
+        if bias:
+            self.in_proj_bias = Parameter(torch.empty(3 * embed_dim))
+        else:
+            self.register_parameter('in_proj_bias', None)
+        self.out_proj = Linear(embed_dim, embed_dim, bias=bias)
+
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.empty(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.empty(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+
+        self.add_zero_attn = add_zero_attn
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        if self._qkv_same_embed_dim:
+            xavier_uniform_(self.in_proj_weight)
+        else:
+            xavier_uniform_(self.q_proj_weight)
+            xavier_uniform_(self.k_proj_weight)
+            xavier_uniform_(self.v_proj_weight)
+
+        if self.in_proj_bias is not None:
+            constant_(self.in_proj_bias, 0.)
+            constant_(self.out_proj.bias, 0.)
+        if self.bias_k is not None:
+            xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            xavier_normal_(self.bias_v)
+
+    def __setstate__(self, state):
+        # Support loading old MultiheadAttention checkpoints generated by v1.1.0
+        if '_qkv_same_embed_dim' not in state:
+            state['_qkv_same_embed_dim'] = True
+
+        super(MultiheadAttention, self).__setstate__(state)
+
+    def forward(self, query, key, value, key_padding_mask=None,
+                need_weights=True, attn_mask=None):
+        # type: (Tensor, Tensor, Tensor, Optional[Tensor], bool, Optional[Tensor]) -> Tuple[Tensor, Optional[Tensor]]
+        r"""
+    Args:
+        query, key, value: map a query and a set of key-value pairs to an output.
+            See "Attention Is All You Need" for more details.
+        key_padding_mask: if provided, specified padding elements in the key will
+            be ignored by the attention. This is an binary mask. When the value is True,
+            the corresponding value on the attention layer will be filled with -inf.
+        need_weights: output attn_output_weights.
+        attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
+            the batches while a 3D mask allows to specify a different mask for the entries of each batch.
+    Shape:
+        - Inputs:
+        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
+          the embedding dimension.
+        - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.
+          If a ByteTensor is provided, the non-zero positions will be ignored while the position
+          with the zero positions will be unchanged. If a BoolTensor is provided, the positions with the
+          value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
+        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
+          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
+          S is the source sequence length. attn_mask ensure that position i is allowed to attend the unmasked
+          positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+          while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``
+          is not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
+          is provided, it will be added to the attention weight.
+        - Outputs:
+        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
+          E is the embedding dimension.
+        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,
+          L is the target sequence length, S is the source sequence length.
+        """
+        if not self._qkv_same_embed_dim:
+            return multi_head_attention_forward(
+                query, key, value, self.embed_dim, self.num_heads,
+                self.in_proj_weight, self.in_proj_bias,
+                self.bias_k, self.bias_v, self.add_zero_attn,
+                self.dropout, self.out_proj.weight, self.out_proj.bias,
+                training=self.training,
+                key_padding_mask=key_padding_mask, need_weights=need_weights,
+                attn_mask=attn_mask, use_separate_proj_weight=True,
+                q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight,
+                v_proj_weight=self.v_proj_weight)
+        else:
+            return multi_head_attention_forward(
+                query, key, value, self.embed_dim, self.num_heads,
+                self.in_proj_weight, self.in_proj_bias,
+                self.bias_k, self.bias_v, self.add_zero_attn,
+                self.dropout, self.out_proj.weight, self.out_proj.bias,
+                training=self.training,
+                key_padding_mask=key_padding_mask, need_weights=need_weights,
+                attn_mask=attn_mask)
+
+
+class Transformer(Module):
+    r"""A transformer model. User is able to modify the attributes as needed. The architecture
+    is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer,
+    Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and
+    Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information
+    Processing Systems, pages 6000-6010. Users can build the BERT(https://arxiv.org/abs/1810.04805)
+    model with corresponding parameters.
+
+    Args:
+        d_model: the number of expected features in the encoder/decoder inputs (default=512).
+        nhead: the number of heads in the multiheadattention models (default=8).
+        num_encoder_layers: the number of sub-encoder-layers in the encoder (default=6).
+        num_decoder_layers: the number of sub-decoder-layers in the decoder (default=6).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of encoder/decoder intermediate layer, relu or gelu (default=relu).
+        custom_encoder: custom encoder (default=None).
+        custom_decoder: custom decoder (default=None).
+
+    Examples::
+        >>> transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
+        >>> src = torch.rand((10, 32, 512))
+        >>> tgt = torch.rand((20, 32, 512))
+        >>> out = transformer_model(src, tgt)
+
+    Note: A full example to apply nn.Transformer module for the word language model is available in
+    https://github.com/pytorch/examples/tree/master/word_language_model
+    """
+
+    def __init__(self, d_model=512, nhead=8, num_encoder_layers=6,
+                 num_decoder_layers=6, dim_feedforward=2048, dropout=0.1,
+                 activation="relu", custom_encoder=None, custom_decoder=None):
+        super(Transformer, self).__init__()
+
+        if custom_encoder is not None:
+            self.encoder = custom_encoder
+        else:
+            encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation)
+            encoder_norm = LayerNorm(d_model)
+            self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
+
+        if custom_decoder is not None:
+            self.decoder = custom_decoder
+        else:
+            decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation)
+            decoder_norm = LayerNorm(d_model)
+            self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm)
+
+        self._reset_parameters()
+
+        self.d_model = d_model
+        self.nhead = nhead
+
+    def forward(self, src, tgt, src_mask=None, tgt_mask=None,
+                memory_mask=None, src_key_padding_mask=None,
+                tgt_key_padding_mask=None, memory_key_padding_mask=None):
+        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor  # noqa
+        r"""Take in and process masked source/target sequences.
+
+        Args:
+            src: the sequence to the encoder (required).
+            tgt: the sequence to the decoder (required).
+            src_mask: the additive mask for the src sequence (optional).
+            tgt_mask: the additive mask for the tgt sequence (optional).
+            memory_mask: the additive mask for the encoder output (optional).
+            src_key_padding_mask: the ByteTensor mask for src keys per batch (optional).
+            tgt_key_padding_mask: the ByteTensor mask for tgt keys per batch (optional).
+            memory_key_padding_mask: the ByteTensor mask for memory keys per batch (optional).
+
+        Shape:
+            - src: :math:`(S, N, E)`.
+            - tgt: :math:`(T, N, E)`.
+            - src_mask: :math:`(S, S)`.
+            - tgt_mask: :math:`(T, T)`.
+            - memory_mask: :math:`(T, S)`.
+            - src_key_padding_mask: :math:`(N, S)`.
+            - tgt_key_padding_mask: :math:`(N, T)`.
+            - memory_key_padding_mask: :math:`(N, S)`.
+
+            Note: [src/tgt/memory]_mask ensures that position i is allowed to attend the unmasked
+            positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+            while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``
+            are not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
+            is provided, it will be added to the attention weight. 
+            [src/tgt/memory]_key_padding_mask provides specified elements in the key to be ignored by
+            the attention. If a ByteTensor is provided, the non-zero positions will be ignored while the zero
+            positions will be unchanged. If a BoolTensor is provided, the positions with the
+            value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
+
+            - output: :math:`(T, N, E)`.
+
+            Note: Due to the multi-head attention architecture in the transformer model,
+            the output sequence length of a transformer is same as the input sequence
+            (i.e. target) length of the decode.
+
+            where S is the source sequence length, T is the target sequence length, N is the
+            batch size, E is the feature number
+
+        Examples:
+            >>> output = transformer_model(src, tgt, src_mask=src_mask, tgt_mask=tgt_mask)
+        """
+
+        if src.size(1) != tgt.size(1):
+            raise RuntimeError("the batch number of src and tgt must be equal")
+
+        if src.size(2) != self.d_model or tgt.size(2) != self.d_model:
+            raise RuntimeError("the feature number of src and tgt must be equal to d_model")
+
+        memory = self.encoder(src, mask=src_mask, src_key_padding_mask=src_key_padding_mask)
+        output = self.decoder(tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask,
+                              tgt_key_padding_mask=tgt_key_padding_mask,
+                              memory_key_padding_mask=memory_key_padding_mask)
+        return output
+
+    def generate_square_subsequent_mask(self, sz):
+        r"""Generate a square mask for the sequence. The masked positions are filled with float('-inf').
+            Unmasked positions are filled with float(0.0).
+        """
+        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+        return mask
+
+    def _reset_parameters(self):
+        r"""Initiate parameters in the transformer model."""
+
+        for p in self.parameters():
+            if p.dim() > 1:
+                xavier_uniform_(p)
+
+
+class TransformerEncoder(Module):
+    r"""TransformerEncoder is a stack of N encoder layers
+
+    Args:
+        encoder_layer: an instance of the TransformerEncoderLayer() class (required).
+        num_layers: the number of sub-encoder-layers in the encoder (required).
+        norm: the layer normalization component (optional).
+
+    Examples::
+        >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
+        >>> transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
+        >>> src = torch.rand(10, 32, 512)
+        >>> out = transformer_encoder(src)
+    """
+    __constants__ = ['norm']
+
+    def __init__(self, encoder_layer, num_layers, norm=None):
+        super(TransformerEncoder, self).__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+
+    def forward(self, src, mask=None, src_key_padding_mask=None):
+        # type: (Tensor, Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the input through the encoder layers in turn.
+
+        Args:
+            src: the sequence to the encoder (required).
+            mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+
+        Shape:
+            see the docs in Transformer class.
+        """
+        output = src
+
+        for i, mod in enumerate(self.layers):
+            output = mod(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask)
+
+        if self.norm is not None:
+            output = self.norm(output)
+
+        return output
+
+
+class TransformerDecoder(Module):
+    r"""TransformerDecoder is a stack of N decoder layers
+
+    Args:
+        decoder_layer: an instance of the TransformerDecoderLayer() class (required).
+        num_layers: the number of sub-decoder-layers in the decoder (required).
+        norm: the layer normalization component (optional).
+
+    Examples::
+        >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
+        >>> transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
+        >>> memory = torch.rand(10, 32, 512)
+        >>> tgt = torch.rand(20, 32, 512)
+        >>> out = transformer_decoder(tgt, memory)
+    """
+    __constants__ = ['norm']
+
+    def __init__(self, decoder_layer, num_layers, norm=None):
+        super(TransformerDecoder, self).__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+
+    def forward(self, tgt, memory, memory2=None, tgt_mask=None,
+                memory_mask=None, memory_mask2=None, tgt_key_padding_mask=None,
+                memory_key_padding_mask=None, memory_key_padding_mask2=None):
+        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the inputs (and mask) through the decoder layer in turn.
+
+        Args:
+            tgt: the sequence to the decoder (required).
+            memory: the sequence from the last layer of the encoder (required).
+            tgt_mask: the mask for the tgt sequence (optional).
+            memory_mask: the mask for the memory sequence (optional).
+            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
+            memory_key_padding_mask: the mask for the memory keys per batch (optional).
+
+        Shape:
+            see the docs in Transformer class.
+        """
+        output = tgt
+
+        for mod in self.layers:
+            output = mod(output, memory, memory2=memory2, tgt_mask=tgt_mask,
+                         memory_mask=memory_mask, memory_mask2=memory_mask2,
+                         tgt_key_padding_mask=tgt_key_padding_mask,
+                         memory_key_padding_mask=memory_key_padding_mask,
+                         memory_key_padding_mask2=memory_key_padding_mask2)
+
+        if self.norm is not None:
+            output = self.norm(output)
+
+        return output
+
+class TransformerEncoderLayer(Module):
+    r"""TransformerEncoderLayer is made up of self-attn and feedforward network.
+    This standard encoder layer is based on the paper "Attention Is All You Need".
+    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
+    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
+    in a different way during application.
+
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of intermediate layer, relu or gelu (default=relu).
+
+    Examples::
+        >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
+        >>> src = torch.rand(10, 32, 512)
+        >>> out = encoder_layer(src)
+    """
+
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, 
+                 activation="relu", debug=False):
+        super(TransformerEncoderLayer, self).__init__()
+        self.debug = debug
+        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = Linear(d_model, dim_feedforward)
+        self.dropout = Dropout(dropout)
+        self.linear2 = Linear(dim_feedforward, d_model)
+
+        self.norm1 = LayerNorm(d_model)
+        self.norm2 = LayerNorm(d_model)
+        self.dropout1 = Dropout(dropout)
+        self.dropout2 = Dropout(dropout)
+
+        self.activation = _get_activation_fn(activation)
+
+    def __setstate__(self, state):
+        if 'activation' not in state:
+            state['activation'] = F.relu
+        super(TransformerEncoderLayer, self).__setstate__(state)
+
+    def forward(self, src, src_mask=None, src_key_padding_mask=None):
+        # type: (Tensor, Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the input through the encoder layer.
+
+        Args:
+            src: the sequence to the encoder layer (required).
+            src_mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+
+        Shape:
+            see the docs in Transformer class.
+        """
+        src2, attn = self.self_attn(src, src, src, attn_mask=src_mask,
+                              key_padding_mask=src_key_padding_mask)
+        if self.debug: self.attn = attn
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        
+        return src
+
+
+class TransformerDecoderLayer(Module):
+    r"""TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network.
+    This standard decoder layer is based on the paper "Attention Is All You Need".
+    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
+    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
+    in a different way during application.
+
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        activation: the activation function of intermediate layer, relu or gelu (default=relu).
+
+    Examples::
+        >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
+        >>> memory = torch.rand(10, 32, 512)
+        >>> tgt = torch.rand(20, 32, 512)
+        >>> out = decoder_layer(tgt, memory)
+    """
+
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, 
+                 activation="relu", self_attn=True, siamese=False, debug=False):
+        super(TransformerDecoderLayer, self).__init__()
+        self.has_self_attn, self.siamese = self_attn, siamese
+        self.debug = debug
+        if self.has_self_attn:
+            self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+            self.norm1 = LayerNorm(d_model)
+            self.dropout1 = Dropout(dropout)
+        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = Linear(d_model, dim_feedforward)
+        self.dropout = Dropout(dropout)
+        self.linear2 = Linear(dim_feedforward, d_model)
+
+        self.norm2 = LayerNorm(d_model)
+        self.norm3 = LayerNorm(d_model)
+        self.dropout2 = Dropout(dropout)
+        self.dropout3 = Dropout(dropout)
+        if self.siamese:
+            self.multihead_attn2 = MultiheadAttention(d_model, nhead, dropout=dropout)
+
+        self.activation = _get_activation_fn(activation)
+
+    def __setstate__(self, state):
+        if 'activation' not in state:
+            state['activation'] = F.relu
+        super(TransformerDecoderLayer, self).__setstate__(state)
+
+    def forward(self, tgt, memory, tgt_mask=None, memory_mask=None,
+                tgt_key_padding_mask=None, memory_key_padding_mask=None,
+                memory2=None, memory_mask2=None, memory_key_padding_mask2=None):
+        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor
+        r"""Pass the inputs (and mask) through the decoder layer.
+
+        Args:
+            tgt: the sequence to the decoder layer (required).
+            memory: the sequence from the last layer of the encoder (required).
+            tgt_mask: the mask for the tgt sequence (optional).
+            memory_mask: the mask for the memory sequence (optional).
+            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
+            memory_key_padding_mask: the mask for the memory keys per batch (optional).
+
+        Shape:
+            see the docs in Transformer class.
+        """
+        if self.has_self_attn:
+            tgt2, attn = self.self_attn(tgt, tgt, tgt, attn_mask=tgt_mask,
+                                key_padding_mask=tgt_key_padding_mask)
+            tgt = tgt + self.dropout1(tgt2)
+            tgt = self.norm1(tgt)
+            if self.debug: self.attn = attn
+        tgt2, attn2 = self.multihead_attn(tgt, memory, memory, attn_mask=memory_mask,
+                                   key_padding_mask=memory_key_padding_mask)
+        if self.debug: self.attn2 = attn2
+
+        if self.siamese:
+            tgt3, attn3 = self.multihead_attn2(tgt, memory2, memory2, attn_mask=memory_mask2,
+                            key_padding_mask=memory_key_padding_mask2)
+            tgt = tgt + self.dropout2(tgt3)
+            if self.debug: self.attn3 = attn3
+
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout3(tgt2)
+        tgt = self.norm3(tgt)
+        
+        return tgt
+
+
+def _get_clones(module, N):
+    return ModuleList([copy.deepcopy(module) for i in range(N)])
+
+
+def _get_activation_fn(activation):
+    if activation == "relu":
+        return F.relu
+    elif activation == "gelu":
+        return F.gelu
+
+    raise RuntimeError("activation should be relu/gelu, not {}".format(activation))
+
+
+class PositionalEncoding(nn.Module):
+    r"""Inject some information about the relative or absolute position of the tokens
+        in the sequence. The positional encodings have the same dimension as
+        the embeddings, so that the two can be summed. Here, we use sine and cosine
+        functions of different frequencies.
+    .. math::
+        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
+        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
+        \text{where pos is the word position and i is the embed idx)
+    Args:
+        d_model: the embed dim (required).
+        dropout: the dropout value (default=0.1).
+        max_len: the max. length of the incoming sequence (default=5000).
+    Examples:
+        >>> pos_encoder = PositionalEncoding(d_model)
+    """
+
+    def __init__(self, d_model, dropout=0.1, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        r"""Inputs of forward function
+        Args:
+            x: the sequence fed to the positional encoder model (required).
+        Shape:
+            x: [sequence length, batch size, embed dim]
+            output: [sequence length, batch size, embed dim]
+        Examples:
+            >>> output = pos_encoder(x)
+        """
+
+        x = x + self.pe[:x.size(0), :]
+        return self.dropout(x)
+
+
+if __name__ == '__main__':
+    transformer_model = Transformer(nhead=16, num_encoder_layers=12)
+    src = torch.rand((10, 32, 512))
+    tgt = torch.rand((20, 32, 512))
+    out = transformer_model(src, tgt)
+    print(out)
diff --git a/src/sts/projects/SWINTS/swints/util/__init__.py b/src/sts/projects/SWINTS/swints/util/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..168f9979a4623806934b0ff1102ac166704e7dec
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/util/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
diff --git a/src/sts/projects/SWINTS/swints/util/__pycache__/__init__.cpython-38.pyc b/src/sts/projects/SWINTS/swints/util/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1d5b5525e4970af7298794c3171e993dc694c7be
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/util/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/util/__pycache__/box_ops.cpython-38.pyc b/src/sts/projects/SWINTS/swints/util/__pycache__/box_ops.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..92515a5774167b416986955e87bdd252d00076b6
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/util/__pycache__/box_ops.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/util/__pycache__/misc.cpython-38.pyc b/src/sts/projects/SWINTS/swints/util/__pycache__/misc.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4206ec5bd00b842df698b77aaacc4c3e48e6fff6
Binary files /dev/null and b/src/sts/projects/SWINTS/swints/util/__pycache__/misc.cpython-38.pyc differ
diff --git a/src/sts/projects/SWINTS/swints/util/box_ops.py b/src/sts/projects/SWINTS/swints/util/box_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c088e5bacc88ff7217fc971f5db889f5bb45b39
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/util/box_ops.py
@@ -0,0 +1,88 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+Utilities for bounding box manipulation and GIoU.
+"""
+import torch
+from torchvision.ops.boxes import box_area
+
+
+def box_cxcywh_to_xyxy(x):
+    x_c, y_c, w, h = x.unbind(-1)
+    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
+         (x_c + 0.5 * w), (y_c + 0.5 * h)]
+    return torch.stack(b, dim=-1)
+
+
+def box_xyxy_to_cxcywh(x):
+    x0, y0, x1, y1 = x.unbind(-1)
+    b = [(x0 + x1) / 2, (y0 + y1) / 2,
+         (x1 - x0), (y1 - y0)]
+    return torch.stack(b, dim=-1)
+
+
+# modified from torchvision to also return the union
+def box_iou(boxes1, boxes2):
+    area1 = box_area(boxes1)
+    area2 = box_area(boxes2)
+
+    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
+    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
+
+    wh = (rb - lt).clamp(min=0)  # [N,M,2]
+    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
+
+    union = area1[:, None] + area2 - inter
+
+    iou = inter / union
+    return iou, union
+
+
+def generalized_box_iou(boxes1, boxes2):
+    """
+    Generalized IoU from https://giou.stanford.edu/
+
+    The boxes should be in [x0, y0, x1, y1] format
+
+    Returns a [N, M] pairwise matrix, where N = len(boxes1)
+    and M = len(boxes2)
+    """
+    # degenerate boxes gives inf / nan results
+    # so do an early check
+    assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
+    assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
+    iou, union = box_iou(boxes1, boxes2)
+
+    lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
+    rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
+
+    wh = (rb - lt).clamp(min=0)  # [N,M,2]
+    area = wh[:, :, 0] * wh[:, :, 1]
+
+    return iou - (area - union) / area
+
+
+def masks_to_boxes(masks):
+    """Compute the bounding boxes around the provided masks
+
+    The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
+
+    Returns a [N, 4] tensors, with the boxes in xyxy format
+    """
+    if masks.numel() == 0:
+        return torch.zeros((0, 4), device=masks.device)
+
+    h, w = masks.shape[-2:]
+
+    y = torch.arange(0, h, dtype=torch.float)
+    x = torch.arange(0, w, dtype=torch.float)
+    y, x = torch.meshgrid(y, x)
+
+    x_mask = (masks * x.unsqueeze(0))
+    x_max = x_mask.flatten(1).max(-1)[0]
+    x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
+
+    y_mask = (masks * y.unsqueeze(0))
+    y_max = y_mask.flatten(1).max(-1)[0]
+    y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
+
+    return torch.stack([x_min, y_min, x_max, y_max], 1)
diff --git a/src/sts/projects/SWINTS/swints/util/colormap.py b/src/sts/projects/SWINTS/swints/util/colormap.py
new file mode 100644
index 0000000000000000000000000000000000000000..de6c8a15f52ff5f37885b24d6603982c3de95db6
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/util/colormap.py
@@ -0,0 +1,178 @@
+import numpy as np
+
+
+def colormap(rgb=False):
+    color_list = np.array(
+        [
+            0.000, 0.447, 0.741,
+            0.850, 0.325, 0.098,
+            0.929, 0.694, 0.125,
+            0.494, 0.184, 0.556,
+            0.466, 0.674, 0.188,
+            0.301, 0.745, 0.933,
+            0.635, 0.078, 0.184,
+            0.300, 0.300, 0.300,
+            0.600, 0.600, 0.600,
+            1.000, 0.000, 0.000,
+            1.000, 0.500, 0.000,
+            0.749, 0.749, 0.000,
+            0.000, 1.000, 0.000,
+            0.000, 0.000, 1.000,
+            0.667, 0.000, 1.000,
+            0.333, 0.333, 0.000,
+            0.333, 0.667, 0.000,
+            0.333, 1.000, 0.000,
+            0.667, 0.333, 0.000,
+            0.667, 0.667, 0.000,
+            0.667, 1.000, 0.000,
+            1.000, 0.333, 0.000,
+            1.000, 0.667, 0.000,
+            1.000, 1.000, 0.000,
+            0.000, 0.333, 0.500,
+            0.000, 0.667, 0.500,
+            0.000, 1.000, 0.500,
+            0.333, 0.000, 0.500,
+            0.333, 0.333, 0.500,
+            0.333, 0.667, 0.500,
+            0.333, 1.000, 0.500,
+            0.667, 0.000, 0.500,
+            0.667, 0.333, 0.500,
+            0.667, 0.667, 0.500,
+            0.667, 1.000, 0.500,
+            1.000, 0.000, 0.500,
+            1.000, 0.333, 0.500,
+            1.000, 0.667, 0.500,
+            1.000, 1.000, 0.500,
+            0.000, 0.333, 1.000,
+            0.000, 0.667, 1.000,
+            0.000, 1.000, 1.000,
+            0.333, 0.000, 1.000,
+            0.333, 0.333, 1.000,
+            0.333, 0.667, 1.000,
+            0.333, 1.000, 1.000,
+            0.667, 0.000, 1.000,
+            0.667, 0.333, 1.000,
+            0.667, 0.667, 1.000,
+            0.667, 1.000, 1.000,
+            1.000, 0.000, 1.000,
+            1.000, 0.333, 1.000,
+            1.000, 0.667, 1.000,
+            0.167, 0.000, 0.000,
+            0.333, 0.000, 0.000,
+            0.500, 0.000, 0.000,
+            0.667, 0.000, 0.000,
+            0.833, 0.000, 0.000,
+            1.000, 0.000, 0.000,
+            0.000, 0.167, 0.000,
+            0.000, 0.333, 0.000,
+            0.000, 0.500, 0.000,
+            0.000, 0.667, 0.000,
+            0.000, 0.833, 0.000,
+            0.000, 1.000, 0.000,
+            0.000, 0.000, 0.167,
+            0.000, 0.000, 0.333,
+            0.000, 0.000, 0.500,
+            0.000, 0.000, 0.667,
+            0.000, 0.000, 0.833,
+            0.000, 0.000, 1.000,
+            0.000, 0.000, 0.000,
+            0.143, 0.143, 0.143,
+            0.286, 0.286, 0.286,
+            0.429, 0.429, 0.429,
+            0.571, 0.571, 0.571,
+            0.714, 0.714, 0.714,
+            0.857, 0.857, 0.857,
+            1.000, 1.000, 1.000
+        ]
+    ).astype(np.float32)
+    color_list = color_list.reshape((-1, 3)) * 255
+    if not rgb:
+        color_list = color_list[:, ::-1]
+    return color_list
+
+
+def category():
+
+    category = [
+        "person",
+        "bicycle",
+        "car",
+        "motorbike",
+        "aeroplane",
+        "bus",
+        "train",
+        "truck",
+        "boat",
+        "traffic light",
+        "fire hydrant",
+        "stop sign",
+        "parking meter",
+        "bench",
+        "bird",
+        "cat",
+        "dog",
+        "horse",
+        "sheep",
+        "cow",
+        "elephant",
+        "bear",
+        "zebra",
+        "giraffe",
+        "backpack",
+        "umbrella",
+        "handbag",
+        "tie",
+        "suitcase",
+        "frisbee",
+        "skis",
+        "snowboard",
+        "sports ball",
+        "kite",
+        "baseball bat",
+        "baseball glove",
+        "skateboard",
+        "surfboard",
+        "tennis racket",
+        "bottle",
+        "wine glass",
+        "cup",
+        "fork",
+        "knife",
+        "spoon",
+        "bowl",
+        "banana",
+        "apple",
+        "sandwich",
+        "orange",
+        "broccoli",
+        "carrot",
+        "hot dog",
+        "pizza",
+        "donut",
+        "cake",
+        "chair",
+        "sofa",
+        "pottedplant",
+        "bed",
+        "diningtable",
+        "toilet",
+        "tvmonitor",
+        "laptop",
+        "mouse",
+        "remote",
+        "keyboard",
+        "cell phone",
+        "microwave",
+        "oven",
+        "toaster",
+        "sink",
+        "refrigerator",
+        "book",
+        "clock",
+        "vase",
+        "scissors",
+        "teddy bear",
+        "hair drier",
+        "toothbrush"]
+    
+    return category
\ No newline at end of file
diff --git a/src/sts/projects/SWINTS/swints/util/misc.py b/src/sts/projects/SWINTS/swints/util/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..edf769cf91654e2bb95993388b2029b8ed975eb6
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/util/misc.py
@@ -0,0 +1,467 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+Misc functions, including distributed helpers.
+
+Mostly copy-paste from torchvision references.
+"""
+import os
+import subprocess
+import time
+from collections import defaultdict, deque
+import datetime
+import pickle
+from typing import Optional, List
+
+import torch
+import torch.distributed as dist
+from torch import Tensor
+
+# needed due to empty tensor bug in pytorch and torchvision 0.5
+import torchvision
+# if float(torchvision.__version__[:3]) < 0.7:
+#     # from torchvision.ops import _new_empty_tensor
+#     from torchvision.ops.misc import _output_size
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not is_dist_avail_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+        dist.barrier()
+        dist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value)
+
+
+def all_gather(data):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors)
+    Args:
+        data: any picklable object
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+
+    # serialized to a Tensor
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to("cuda")
+
+    # obtain Tensor size of each rank
+    local_size = torch.tensor([tensor.numel()], device="cuda")
+    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
+    dist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
+    if local_size != max_size:
+        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+        tensor = torch.cat((tensor, padding), dim=0)
+    dist.all_gather(tensor_list, tensor)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Args:
+        input_dict (dict): all the values will be reduced
+        average (bool): whether to do average or sum
+    Reduce the values in the dictionary from all processes so that all processes
+    have the averaged results. Returns a dict with the same fields as
+    input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.all_reduce(values)
+        if average:
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+
+
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError("'{}' object has no attribute '{}'".format(
+            type(self).__name__, attr))
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append(
+                "{}: {}".format(name, str(meter))
+            )
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ''
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt='{avg:.4f}')
+        data_time = SmoothedValue(fmt='{avg:.4f}')
+        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+        if torch.cuda.is_available():
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}',
+                'max mem: {memory:.0f}'
+            ])
+        else:
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}'
+            ])
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time),
+                        memory=torch.cuda.max_memory_allocated() / MB))
+                else:
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time)))
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print('{} Total time: {} ({:.4f} s / it)'.format(
+            header, total_time_str, total_time / len(iterable)))
+
+
+def get_sha():
+    cwd = os.path.dirname(os.path.abspath(__file__))
+
+    def _run(command):
+        return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
+    sha = 'N/A'
+    diff = "clean"
+    branch = 'N/A'
+    try:
+        sha = _run(['git', 'rev-parse', 'HEAD'])
+        subprocess.check_output(['git', 'diff'], cwd=cwd)
+        diff = _run(['git', 'diff-index', 'HEAD'])
+        diff = "has uncommited changes" if diff else "clean"
+        branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
+    except Exception:
+        pass
+    message = f"sha: {sha}, status: {diff}, branch: {branch}"
+    return message
+
+
+def collate_fn(batch):
+    batch = list(zip(*batch))
+    batch[0] = nested_tensor_from_tensor_list(batch[0])
+    return tuple(batch)
+
+
+def _max_by_axis(the_list):
+    # type: (List[List[int]]) -> List[int]
+    maxes = the_list[0]
+    for sublist in the_list[1:]:
+        for index, item in enumerate(sublist):
+            maxes[index] = max(maxes[index], item)
+    return maxes
+
+
+class NestedTensor(object):
+    def __init__(self, tensors, mask: Optional[Tensor]):
+        self.tensors = tensors
+        self.mask = mask
+
+    def to(self, device):
+        # type: (Device) -> NestedTensor # noqa
+        cast_tensor = self.tensors.to(device)
+        mask = self.mask
+        if mask is not None:
+            assert mask is not None
+            cast_mask = mask.to(device)
+        else:
+            cast_mask = None
+        return NestedTensor(cast_tensor, cast_mask)
+
+    def decompose(self):
+        return self.tensors, self.mask
+
+    def __repr__(self):
+        return str(self.tensors)
+
+
+def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
+    # TODO make this more general
+    if tensor_list[0].ndim == 3:
+        if torchvision._is_tracing():
+            # nested_tensor_from_tensor_list() does not export well to ONNX
+            # call _onnx_nested_tensor_from_tensor_list() instead
+            return _onnx_nested_tensor_from_tensor_list(tensor_list)
+
+        # TODO make it support different-sized images
+        max_size = _max_by_axis([list(img.shape) for img in tensor_list])
+        # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
+        batch_shape = [len(tensor_list)] + max_size
+        b, c, h, w = batch_shape
+        dtype = tensor_list[0].dtype
+        device = tensor_list[0].device
+        tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
+        mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
+        for img, pad_img, m in zip(tensor_list, tensor, mask):
+            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+            m[: img.shape[1], :img.shape[2]] = False
+    else:
+        raise ValueError('not supported')
+    return NestedTensor(tensor, mask)
+
+
+# _onnx_nested_tensor_from_tensor_list() is an implementation of
+# nested_tensor_from_tensor_list() that is supported by ONNX tracing.
+@torch.jit.unused
+def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
+    max_size = []
+    for i in range(tensor_list[0].dim()):
+        max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64)
+        max_size.append(max_size_i)
+    max_size = tuple(max_size)
+
+    # work around for
+    # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+    # m[: img.shape[1], :img.shape[2]] = False
+    # which is not yet supported in onnx
+    padded_imgs = []
+    padded_masks = []
+    for img in tensor_list:
+        padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+        padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
+        padded_imgs.append(padded_img)
+
+        m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
+        padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
+        padded_masks.append(padded_mask.to(torch.bool))
+
+    tensor = torch.stack(padded_imgs)
+    mask = torch.stack(padded_masks)
+
+    return NestedTensor(tensor, mask=mask)
+
+
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop('force', False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ['WORLD_SIZE'])
+        args.gpu = int(os.environ['LOCAL_RANK'])
+    elif 'SLURM_PROCID' in os.environ:
+        args.rank = int(os.environ['SLURM_PROCID'])
+        args.gpu = args.rank % torch.cuda.device_count()
+    else:
+        print('Not using distributed mode')
+        args.distributed = False
+        return
+
+    args.distributed = True
+
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = 'nccl'
+    print('| distributed init (rank {}): {}'.format(
+        args.rank, args.dist_url), flush=True)
+    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                         world_size=args.world_size, rank=args.rank)
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
+
+
+@torch.no_grad()
+def accuracy(output, target, topk=(1,)):
+    """Computes the precision@k for the specified values of k"""
+    if target.numel() == 0:
+        return [torch.zeros([], device=output.device)]
+    maxk = max(topk)
+    batch_size = target.size(0)
+
+    _, pred = output.topk(maxk, 1, True, True)
+    pred = pred.t()
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+    res = []
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0)
+        res.append(correct_k.mul_(100.0 / batch_size))
+    return res
+
+
+def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
+    # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
+    """
+    Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
+    This will eventually be supported natively by PyTorch, and this
+    class can go away.
+    """
+    # if float(torchvision.__version__[:3]) < 0.7:
+    #     if input.numel() > 0:
+    #         return torch.nn.functional.interpolate(
+    #             input, size, scale_factor, mode, align_corners
+    #         )
+
+    #     output_shape = _output_size(2, input, size, scale_factor)
+    #     output_shape = list(input.shape[:-2]) + list(output_shape)
+    #     return _new_empty_tensor(input, output_shape)
+    # else:
+    return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
diff --git a/src/sts/projects/SWINTS/swints/util/plot_utils.py b/src/sts/projects/SWINTS/swints/util/plot_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f24bed0d3fe4624aeb231ddd02633f2e58e4bff
--- /dev/null
+++ b/src/sts/projects/SWINTS/swints/util/plot_utils.py
@@ -0,0 +1,107 @@
+"""
+Plotting utilities to visualize training logs.
+"""
+import torch
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+from pathlib import Path, PurePath
+
+
+def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col=0, log_name='log.txt'):
+    '''
+    Function to plot specific fields from training log(s). Plots both training and test results.
+
+    :: Inputs - logs = list containing Path objects, each pointing to individual dir with a log file
+              - fields = which results to plot from each log file - plots both training and test for each field.
+              - ewm_col = optional, which column to use as the exponential weighted smoothing of the plots
+              - log_name = optional, name of log file if different than default 'log.txt'.
+
+    :: Outputs - matplotlib plots of results in fields, color coded for each log file.
+               - solid lines are training results, dashed lines are test results.
+
+    '''
+    func_name = "plot_utils.py::plot_logs"
+
+    # verify logs is a list of Paths (list[Paths]) or single Pathlib object Path,
+    # convert single Path to list to avoid 'not iterable' error
+
+    if not isinstance(logs, list):
+        if isinstance(logs, PurePath):
+            logs = [logs]
+            print(f"{func_name} info: logs param expects a list argument, converted to list[Path].")
+        else:
+            raise ValueError(f"{func_name} - invalid argument for logs parameter.\n \
+            Expect list[Path] or single Path obj, received {type(logs)}")
+
+    # Quality checks - verify valid dir(s), that every item in list is Path object, and that log_name exists in each dir
+    for i, dir in enumerate(logs):
+        if not isinstance(dir, PurePath):
+            raise ValueError(f"{func_name} - non-Path object in logs argument of {type(dir)}: \n{dir}")
+        if not dir.exists():
+            raise ValueError(f"{func_name} - invalid directory in logs argument:\n{dir}")
+        # verify log_name exists
+        fn = Path(dir / log_name)
+        if not fn.exists():
+            print(f"-> missing {log_name}.  Have you gotten to Epoch 1 in training?")
+            print(f"--> full path of missing log file: {fn}")
+            return
+
+    # load log file(s) and plot
+    dfs = [pd.read_json(Path(p) / log_name, lines=True) for p in logs]
+
+    fig, axs = plt.subplots(ncols=len(fields), figsize=(16, 5))
+
+    for df, color in zip(dfs, sns.color_palette(n_colors=len(logs))):
+        for j, field in enumerate(fields):
+            if field == 'mAP':
+                coco_eval = pd.DataFrame(
+                    np.stack(df.test_coco_eval_bbox.dropna().values)[:, 1]
+                ).ewm(com=ewm_col).mean()
+                axs[j].plot(coco_eval, c=color)
+            else:
+                df.interpolate().ewm(com=ewm_col).mean().plot(
+                    y=[f'train_{field}', f'test_{field}'],
+                    ax=axs[j],
+                    color=[color] * 2,
+                    style=['-', '--']
+                )
+    for ax, field in zip(axs, fields):
+        ax.legend([Path(p).name for p in logs])
+        ax.set_title(field)
+
+
+def plot_precision_recall(files, naming_scheme='iter'):
+    if naming_scheme == 'exp_id':
+        # name becomes exp_id
+        names = [f.parts[-3] for f in files]
+    elif naming_scheme == 'iter':
+        names = [f.stem for f in files]
+    else:
+        raise ValueError(f'not supported {naming_scheme}')
+    fig, axs = plt.subplots(ncols=2, figsize=(16, 5))
+    for f, color, name in zip(files, sns.color_palette("Blues", n_colors=len(files)), names):
+        data = torch.load(f)
+        # precision is n_iou, n_points, n_cat, n_area, max_det
+        precision = data['precision']
+        recall = data['params'].recThrs
+        scores = data['scores']
+        # take precision for all classes, all areas and 100 detections
+        precision = precision[0, :, :, 0, -1].mean(1)
+        scores = scores[0, :, :, 0, -1].mean(1)
+        prec = precision.mean()
+        rec = data['recall'][0, :, 0, -1].mean()
+        print(f'{naming_scheme} {name}: mAP@50={prec * 100: 05.1f}, ' +
+              f'score={scores.mean():0.3f}, ' +
+              f'f1={2 * prec * rec / (prec + rec + 1e-8):0.3f}'
+              )
+        axs[0].plot(recall, precision, c=color)
+        axs[1].plot(recall, scores, c=color)
+
+    axs[0].set_title('Precision / Recall')
+    axs[0].legend(names)
+    axs[1].set_title('Scores / Recall')
+    axs[1].legend(names)
+    return fig, axs
diff --git a/src/sts/projects/SWINTS/train_net.py b/src/sts/projects/SWINTS/train_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3c1752862d3e4dd9beac083f3bf6c35de34f6d5
--- /dev/null
+++ b/src/sts/projects/SWINTS/train_net.py
@@ -0,0 +1,141 @@
+#
+# Modified by Peize Sun, Rufeng Zhang
+# Contact: {sunpeize, cxrfzhang}@foxmail.com
+#
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+"""
+SparseRCNN Training Script.
+
+This script is a simplified version of the training script in detectron2/tools.
+"""
+
+import os
+import itertools
+import time
+from typing import Any, Dict, List, Set
+
+import torch
+
+import detectron2.utils.comm as comm
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import MetadataCatalog, build_detection_train_loader
+from detectron2.engine import AutogradProfiler, DefaultTrainer, default_argument_parser, default_setup, launch
+from detectron2.evaluation import COCOEvaluator, verify_results, TextEvaluator
+from detectron2.solver.build import maybe_add_gradient_clipping
+
+from swints import SWINTSDatasetMapper, add_SWINTS_config
+
+
+class Trainer(DefaultTrainer):
+#     """
+#     Extension of the Trainer class adapted to SparseRCNN.
+#     """
+
+    @classmethod
+    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
+        """
+        Create evaluator(s) for a given dataset.
+        This uses the special metadata "evaluator_type" associated with each builtin dataset.
+        For your own dataset, you can simply create an evaluator manually in your
+        script and do not have to worry about the hacky if-else logic here.
+        """
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        return TextEvaluator(dataset_name, cfg, True, output_folder)
+
+    @classmethod
+    def build_train_loader(cls, cfg):
+        mapper = SWINTSDatasetMapper(cfg, is_train=True)
+        return build_detection_train_loader(cfg, mapper=mapper)
+
+    @classmethod
+    def build_optimizer(cls, cfg, model):
+        params: List[Dict[str, Any]] = []
+        memo: Set[torch.nn.parameter.Parameter] = set()
+        for key, value in model.named_parameters(recurse=True):
+            if not value.requires_grad:
+                continue
+            # Avoid duplicating parameters
+            if value in memo:
+                continue
+            memo.add(value)
+            lr = cfg.SOLVER.BASE_LR
+            weight_decay = cfg.SOLVER.WEIGHT_DECAY
+            if "backbone" in key:
+                lr = lr * cfg.SOLVER.BACKBONE_MULTIPLIER
+            params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
+
+        def maybe_add_full_model_gradient_clipping(optim):  # optim: the optimizer class
+            # detectron2 doesn't have full model gradient clipping now
+            clip_norm_val = cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE
+            enable = (
+                cfg.SOLVER.CLIP_GRADIENTS.ENABLED
+                and cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model"
+                and clip_norm_val > 0.0
+            )
+
+            class FullModelGradientClippingOptimizer(optim):
+                def step(self, closure=None):
+                    all_params = itertools.chain(*[x["params"] for x in self.param_groups])
+                    torch.nn.utils.clip_grad_norm_(all_params, clip_norm_val)
+                    super().step(closure=closure)
+
+            return FullModelGradientClippingOptimizer if enable else optim
+
+        optimizer_type = cfg.SOLVER.OPTIMIZER
+        if optimizer_type == "SGD":
+            optimizer = maybe_add_full_model_gradient_clipping(torch.optim.SGD)(
+                params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM
+            )
+        elif optimizer_type == "ADAMW":
+            optimizer = maybe_add_full_model_gradient_clipping(torch.optim.AdamW)(
+                params, cfg.SOLVER.BASE_LR
+            )
+        else:
+            raise NotImplementedError(f"no optimizer type {optimizer_type}")
+        if not cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model":
+            optimizer = maybe_add_gradient_clipping(cfg, optimizer)
+        return optimizer
+
+
+def setup(args):
+    """
+    Create configs and perform basic setups.
+    """
+    cfg = get_cfg()
+    add_SWINTS_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    default_setup(cfg, args)
+    return cfg
+
+
+def main(args):
+    cfg = setup(args)
+
+    if args.eval_only:
+        model = Trainer.build_model(cfg)
+        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(cfg.MODEL.WEIGHTS, resume=args.resume)
+        res = Trainer.test(cfg, model)
+        if comm.is_main_process():
+            verify_results(cfg, res)
+        return res
+
+    trainer = Trainer(cfg)
+    trainer.resume_or_load(resume=args.resume)
+    return trainer.train()
+
+
+if __name__ == "__main__":
+    args = default_argument_parser().parse_args()
+    print("Command Line Args:", args)
+    launch(
+        main,
+        args.num_gpus,
+        num_machines=args.num_machines,
+        machine_rank=args.machine_rank,
+        dist_url=args.dist_url,
+        args=(args,),
+    )
diff --git a/src/sts/run.sh b/src/sts/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..57e85af044ee59a6ce65141031a06c29a2a06d4f
--- /dev/null
+++ b/src/sts/run.sh
@@ -0,0 +1,12 @@
+#swintext
+# Download model and save to checkpoints/ 
+#setup
+cd SwinTextSpotter
+conda create -n SWINTS python=3.8 -y
+conda activate SWINTS
+conda install pytorch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 cudatoolkit=11.1 -c pytorch -c conda-forge
+pip install opencv-python scipy shapely rapidfuzz timm Polygon3
+python setup.py build develop
+
+#run predict
+python demo/merge.py --config-file projects/SWINTS/configs/SWINTS-swin-finetune-vintext.yaml --input /image --inputfile /input/sign --output ../output/output_merge/ --opts MODEL.WEIGHTS checkpoints/model_final.pth
\ No newline at end of file
diff --git a/src/sts/setup.cfg b/src/sts/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..943d3e04a0c7c9caf7ead5c406904785f601eea9
--- /dev/null
+++ b/src/sts/setup.cfg
@@ -0,0 +1,26 @@
+[isort]
+line_length=100
+multi_line_output=3
+include_trailing_comma=True
+known_standard_library=numpy,setuptools,mock
+skip=./datasets,docs
+skip_glob=*/__init__.py,**/configs/**
+known_myself=detectron2
+known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx,panopticapi,black,isort,av,iopath,omegaconf,hydra,yaml,pydoc,submitit,cloudpickle
+no_lines_before=STDLIB,THIRDPARTY
+sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
+default_section=FIRSTPARTY
+
+[mypy]
+python_version=3.6
+ignore_missing_imports = True
+warn_unused_configs = True
+disallow_untyped_defs = True
+check_untyped_defs = True
+warn_unused_ignores = True
+warn_redundant_casts = True
+show_column_numbers = True
+follow_imports = silent
+allow_redefinition = True
+; Require all functions to be annotated
+disallow_incomplete_defs = True
diff --git a/src/sts/setup.py b/src/sts/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a5911334d136f52dabd680ff74189987d5d9656
--- /dev/null
+++ b/src/sts/setup.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+# import sys
+# sys.path.insert(0, '../Pipeline_POI_Engineering/src/sts')
+import glob
+import os
+import shutil
+from os import path
+from setuptools import find_packages, setup
+from typing import List
+import torch
+from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
+from torch.utils.hipify import hipify_python
+
+torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
+assert torch_ver >= [1, 6], "Requires PyTorch >= 1.6"
+
+
+def get_version():
+    init_py_path = path.join(path.abspath(path.dirname(__file__)), "detectron2", "__init__.py")
+    init_py = open(init_py_path, "r").readlines()
+    version_line = [l.strip() for l in init_py if l.startswith("__version__")][0]
+    version = version_line.split("=")[-1].strip().strip("'\"")
+
+    # The following is used to build release packages.
+    # Users should never use it.
+    suffix = os.getenv("D2_VERSION_SUFFIX", "")
+    version = version + suffix
+    if os.getenv("BUILD_NIGHTLY", "0") == "1":
+        from datetime import datetime
+
+        date_str = datetime.today().strftime("%y%m%d")
+        version = version + ".dev" + date_str
+
+        new_init_py = [l for l in init_py if not l.startswith("__version__")]
+        new_init_py.append('__version__ = "{}"\n'.format(version))
+        with open(init_py_path, "w") as f:
+            f.write("".join(new_init_py))
+    return version
+
+
+def get_extensions():
+    this_dir = path.dirname(path.abspath(__file__))
+    extensions_dir = path.join(this_dir, "detectron2", "layers", "csrc")
+
+    main_source = path.join(extensions_dir, "vision.cpp")
+    sources = glob.glob(path.join(extensions_dir, "**", "*.cpp"))
+
+    from torch.utils.cpp_extension import ROCM_HOME
+
+    is_rocm_pytorch = (
+        True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False
+    )
+
+    hipify_ver = (
+        [int(x) for x in torch.utils.hipify.__version__.split(".")]
+        if hasattr(torch.utils.hipify, "__version__")
+        else [0, 0, 0]
+    )
+
+    if is_rocm_pytorch and hipify_ver < [1, 0, 0]:  # TODO not needed since pt1.8
+
+        # Earlier versions of hipification and extension modules were not
+        # transparent, i.e. would require an explicit call to hipify, and the
+        # hipification would introduce "hip" subdirectories, possibly changing
+        # the relationship between source and header files.
+        # This path is maintained for backwards compatibility.
+
+        hipify_python.hipify(
+            project_directory=this_dir,
+            output_directory=this_dir,
+            includes="detectron2/layers/csrc/*",
+            show_detailed=True,
+            is_pytorch_extension=True,
+        )
+
+        source_cuda = glob.glob(path.join(extensions_dir, "**", "hip", "*.hip")) + glob.glob(
+            path.join(extensions_dir, "hip", "*.hip")
+        )
+
+        shutil.copy(
+            "detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h",
+            "detectron2/layers/csrc/box_iou_rotated/hip/box_iou_rotated_utils.h",
+        )
+        shutil.copy(
+            "detectron2/layers/csrc/deformable/deform_conv.h",
+            "detectron2/layers/csrc/deformable/hip/deform_conv.h",
+        )
+
+        sources = [main_source] + sources
+        sources = [
+            s
+            for s in sources
+            if not is_rocm_pytorch or torch_ver < [1, 7] or not s.endswith("hip/vision.cpp")
+        ]
+
+    else:
+
+        # common code between cuda and rocm platforms,
+        # for hipify version [1,0,0] and later.
+
+        source_cuda = glob.glob(path.join(extensions_dir, "**", "*.cu")) + glob.glob(
+            path.join(extensions_dir, "*.cu")
+        )
+
+        sources = [main_source] + sources
+
+    extension = CppExtension
+
+    extra_compile_args = {"cxx": []}
+    define_macros = []
+
+    if (torch.cuda.is_available() and ((CUDA_HOME is not None) or is_rocm_pytorch)) or os.getenv(
+        "FORCE_CUDA", "0"
+    ) == "1":
+        extension = CUDAExtension
+        sources += source_cuda
+
+        if not is_rocm_pytorch:
+            define_macros += [("WITH_CUDA", None)]
+            extra_compile_args["nvcc"] = [
+                "-O3",
+                "-DCUDA_HAS_FP16=1",
+                "-D__CUDA_NO_HALF_OPERATORS__",
+                "-D__CUDA_NO_HALF_CONVERSIONS__",
+                "-D__CUDA_NO_HALF2_OPERATORS__",
+            ]
+        else:
+            define_macros += [("WITH_HIP", None)]
+            extra_compile_args["nvcc"] = []
+
+        if torch_ver < [1, 7]:
+            # supported by https://github.com/pytorch/pytorch/pull/43931
+            CC = os.environ.get("CC", None)
+            if CC is not None:
+                extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
+
+    include_dirs = [extensions_dir]
+
+    ext_modules = [
+        extension(
+            "detectron2._C",
+            sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        )
+    ]
+
+    return ext_modules
+
+
+def get_model_zoo_configs() -> List[str]:
+    """
+    Return a list of configs to include in package for model zoo. Copy over these configs inside
+    detectron2/model_zoo.
+    """
+
+    # Use absolute paths while symlinking.
+    source_configs_dir = path.join(path.dirname(path.realpath(__file__)), "configs")
+    destination = path.join(
+        path.dirname(path.realpath(__file__)), "detectron2", "model_zoo", "configs"
+    )
+    # Symlink the config directory inside package to have a cleaner pip install.
+
+    # Remove stale symlink/directory from a previous build.
+    if path.exists(source_configs_dir):
+        if path.islink(destination):
+            os.unlink(destination)
+        elif path.isdir(destination):
+            shutil.rmtree(destination)
+
+    if not path.exists(destination):
+        try:
+            os.symlink(source_configs_dir, destination)
+        except OSError:
+            # Fall back to copying if symlink fails: ex. on Windows.
+            shutil.copytree(source_configs_dir, destination)
+
+    config_paths = glob.glob("configs/**/*.yaml", recursive=True)
+    return config_paths
+
+
+# For projects that are relative small and provide features that are very close
+# to detectron2's core functionalities, we install them under detectron2.projects
+PROJECTS = {
+    "detectron2.projects.SWINTS": "projects/SWINTS/swints",
+}
+
+setup(
+    name="detectron2",
+    version=get_version(),
+    author="FAIR",
+    url="https://github.com/facebookresearch/detectron2",
+    description="Detectron2 is FAIR's next-generation research "
+    "platform for object detection and segmentation.",
+    packages=find_packages(exclude=("configs", "tests*")) + list(PROJECTS.keys()),
+    package_dir=PROJECTS,
+    package_data={"detectron2.model_zoo": get_model_zoo_configs()},
+    python_requires=">=3.6",
+    install_requires=[
+        # Do not add opencv here. Just like pytorch, user should install
+        # opencv themselves, preferrably by OS's package manager, or by
+        # choosing the proper pypi package name at https://github.com/skvark/opencv-python
+        "termcolor>=1.1",
+        "Pillow>=7.1",  # or use pillow-simd for better performance
+        "yacs>=0.1.6",
+        "tabulate",
+        "cloudpickle",
+        "matplotlib",
+        "tqdm>4.29.0",
+        "tensorboard",
+        # Lock version of fvcore/iopath because they may have breaking changes
+        # NOTE: when updating fvcore/iopath version, make sure fvcore depends
+        # on the same version of iopath.
+        "fvcore>=0.1.5,<0.1.6",  # required like this to make it pip installable
+        "iopath>=0.1.7,<0.1.8",
+        "pycocotools>=2.0.2",  # corresponds to https://github.com/ppwwyyxx/cocoapi
+        "future",  # used by caffe2
+        "pydot",  # used to save caffe2 SVGs
+        "dataclasses; python_version<'3.7'",
+        "omegaconf==2.3.0",
+        # When adding to the list, may need to update docs/requirements.txt
+        # or add mock in docs/conf.py
+    ],
+    extras_require={
+        "all": [
+            "shapely",
+            "psutil",
+            "hydra-core",
+            "panopticapi @ https://github.com/cocodataset/panopticapi/archive/master.zip",
+        ],
+        "dev": [
+            "flake8==3.8.1",
+            "isort==4.3.21",
+            "black==20.8b1",
+            "flake8-bugbear",
+            "flake8-comprehensions",
+        ],
+    },
+    ext_modules=get_extensions(),
+    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
+)
diff --git a/src/sts/tests/README.md b/src/sts/tests/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f560384045ab4f6bc2beabef1170308fca117eb3
--- /dev/null
+++ b/src/sts/tests/README.md
@@ -0,0 +1,9 @@
+## Unit Tests
+
+To run the unittests, do:
+```
+cd detectron2
+python -m unittest discover -v -s ./tests
+```
+
+There are also end-to-end inference & training tests, in [dev/run_*_tests.sh](../dev).
diff --git a/src/sts/tests/__init__.py b/src/sts/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9020c2df23e2af280b7bb168b996ae9eaf312eb8
--- /dev/null
+++ b/src/sts/tests/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
diff --git a/src/sts/tests/layers/__init__.py b/src/sts/tests/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/sts/tests/layers/test_blocks.py b/src/sts/tests/layers/test_blocks.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a0488adbfcf0c7eca08616f43ebf695acad4b7e
--- /dev/null
+++ b/src/sts/tests/layers/test_blocks.py
@@ -0,0 +1,51 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import unittest
+import torch
+from torch import nn
+
+from detectron2.layers import ASPP, DepthwiseSeparableConv2d, FrozenBatchNorm2d
+from detectron2.modeling.backbone.resnet import BasicStem, ResNet
+
+
+"""
+Test for misc layers.
+"""
+
+
+class TestBlocks(unittest.TestCase):
+    def test_separable_conv(self):
+        DepthwiseSeparableConv2d(3, 10, norm1="BN", activation1=nn.PReLU())
+
+    def test_aspp(self):
+        m = ASPP(3, 10, [2, 3, 4], norm="", activation=nn.PReLU())
+        self.assertIsNot(m.convs[0].activation.weight, m.convs[1].activation.weight)
+        self.assertIsNot(m.convs[0].activation.weight, m.project.activation.weight)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_frozen_batchnorm_fp16(self):
+        from torch.cuda.amp import autocast
+
+        C = 10
+        input = torch.rand(1, C, 10, 10).cuda()
+        m = FrozenBatchNorm2d(C).cuda()
+        with autocast():
+            output = m(input.half())
+        self.assertEqual(output.dtype, torch.float16)
+
+        # requires_grad triggers a different codepath
+        input.requires_grad_()
+        with autocast():
+            output = m(input.half())
+        self.assertEqual(output.dtype, torch.float16)
+
+    def test_resnet_unused_stages(self):
+        resnet = ResNet(BasicStem(), ResNet.make_default_stages(18), out_features=["res2"])
+        self.assertTrue(hasattr(resnet, "res2"))
+        self.assertFalse(hasattr(resnet, "res3"))
+        self.assertFalse(hasattr(resnet, "res5"))
+
+        resnet = ResNet(BasicStem(), ResNet.make_default_stages(18), out_features=["res2", "res5"])
+        self.assertTrue(hasattr(resnet, "res2"))
+        self.assertTrue(hasattr(resnet, "res4"))
+        self.assertTrue(hasattr(resnet, "res5"))
diff --git a/src/sts/tests/layers/test_deformable.py b/src/sts/tests/layers/test_deformable.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c1543ed5ef37fec3bb0d9fe2247858ad9582a41
--- /dev/null
+++ b/src/sts/tests/layers/test_deformable.py
@@ -0,0 +1,170 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+import unittest
+import torch
+
+from detectron2.layers import DeformConv, ModulatedDeformConv
+
+
+class DeformableTest(unittest.TestCase):
+    @unittest.skipIf(not torch.cuda.is_available(), "Deformable not supported for cpu")
+    def test_forward_output(self):
+        device = torch.device("cuda")
+        N, C, H, W = shape = 1, 1, 5, 5
+        kernel_size = 3
+        padding = 1
+
+        inputs = torch.arange(np.prod(shape), dtype=torch.float32).reshape(*shape).to(device)
+        """
+        0  1  2   3 4
+        5  6  7   8 9
+        10 11 12 13 14
+        15 16 17 18 19
+        20 21 22 23 24
+        """
+        offset_channels = kernel_size * kernel_size * 2
+        offset = torch.full((N, offset_channels, H, W), 0.5, dtype=torch.float32).to(device)
+
+        # Test DCN v1
+        deform = DeformConv(C, C, kernel_size=kernel_size, padding=padding).to(device)
+        deform.weight = torch.nn.Parameter(torch.ones_like(deform.weight))
+        output = deform(inputs, offset)
+        output = output.detach().cpu().numpy()
+        deform_results = np.array(
+            [
+                [30, 41.25, 48.75, 45, 28.75],
+                [62.25, 81, 90, 80.25, 50.25],
+                [99.75, 126, 135, 117.75, 72.75],
+                [105, 131.25, 138.75, 120, 73.75],
+                [71.75, 89.25, 93.75, 80.75, 49.5],
+            ]
+        )
+        self.assertTrue(np.allclose(output.flatten(), deform_results.flatten()))
+
+        # Test DCN v2
+        mask_channels = kernel_size * kernel_size
+        mask = torch.full((N, mask_channels, H, W), 0.5, dtype=torch.float32).to(device)
+        modulate_deform = ModulatedDeformConv(C, C, kernel_size, padding=padding, bias=False).to(
+            device
+        )
+        modulate_deform.weight = deform.weight
+        output = modulate_deform(inputs, offset, mask)
+        output = output.detach().cpu().numpy()
+        self.assertTrue(np.allclose(output.flatten(), deform_results.flatten() * 0.5))
+
+    def test_forward_output_on_cpu(self):
+        device = torch.device("cpu")
+        N, C, H, W = shape = 1, 1, 5, 5
+        kernel_size = 3
+        padding = 1
+
+        inputs = torch.arange(np.prod(shape), dtype=torch.float32).reshape(*shape).to(device)
+
+        offset_channels = kernel_size * kernel_size * 2
+        offset = torch.full((N, offset_channels, H, W), 0.5, dtype=torch.float32).to(device)
+
+        # Test DCN v1 on cpu
+        deform = DeformConv(C, C, kernel_size=kernel_size, padding=padding).to(device)
+        deform.weight = torch.nn.Parameter(torch.ones_like(deform.weight))
+        output = deform(inputs, offset)
+        output = output.detach().cpu().numpy()
+        deform_results = np.array(
+            [
+                [30, 41.25, 48.75, 45, 28.75],
+                [62.25, 81, 90, 80.25, 50.25],
+                [99.75, 126, 135, 117.75, 72.75],
+                [105, 131.25, 138.75, 120, 73.75],
+                [71.75, 89.25, 93.75, 80.75, 49.5],
+            ]
+        )
+        self.assertTrue(np.allclose(output.flatten(), deform_results.flatten()))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "This test requires gpu access")
+    def test_forward_output_on_cpu_equals_output_on_gpu(self):
+        N, C, H, W = shape = 2, 4, 10, 10
+        kernel_size = 3
+        padding = 1
+
+        for groups in [1, 2]:
+            inputs = torch.arange(np.prod(shape), dtype=torch.float32).reshape(*shape)
+            offset_channels = kernel_size * kernel_size * 2
+            offset = torch.full((N, offset_channels, H, W), 0.5, dtype=torch.float32)
+
+            deform_gpu = DeformConv(
+                C, C, kernel_size=kernel_size, padding=padding, groups=groups
+            ).to("cuda")
+            deform_gpu.weight = torch.nn.Parameter(torch.ones_like(deform_gpu.weight))
+            output_gpu = deform_gpu(inputs.to("cuda"), offset.to("cuda")).detach().cpu().numpy()
+
+            deform_cpu = DeformConv(
+                C, C, kernel_size=kernel_size, padding=padding, groups=groups
+            ).to("cpu")
+            deform_cpu.weight = torch.nn.Parameter(torch.ones_like(deform_cpu.weight))
+            output_cpu = deform_cpu(inputs.to("cpu"), offset.to("cpu")).detach().numpy()
+
+        self.assertTrue(np.allclose(output_gpu.flatten(), output_cpu.flatten()))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "Deformable not supported for cpu")
+    def test_small_input(self):
+        device = torch.device("cuda")
+        for kernel_size in [3, 5]:
+            padding = kernel_size // 2
+            N, C, H, W = shape = (1, 1, kernel_size - 1, kernel_size - 1)
+
+            inputs = torch.rand(shape).to(device)  # input size is smaller than kernel size
+
+            offset_channels = kernel_size * kernel_size * 2
+            offset = torch.randn((N, offset_channels, H, W), dtype=torch.float32).to(device)
+            deform = DeformConv(C, C, kernel_size=kernel_size, padding=padding).to(device)
+            output = deform(inputs, offset)
+            self.assertTrue(output.shape == inputs.shape)
+
+            mask_channels = kernel_size * kernel_size
+            mask = torch.ones((N, mask_channels, H, W), dtype=torch.float32).to(device)
+            modulate_deform = ModulatedDeformConv(
+                C, C, kernel_size, padding=padding, bias=False
+            ).to(device)
+            output = modulate_deform(inputs, offset, mask)
+            self.assertTrue(output.shape == inputs.shape)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "Deformable not supported for cpu")
+    def test_raise_exception(self):
+        device = torch.device("cuda")
+        N, C, H, W = shape = 1, 1, 3, 3
+        kernel_size = 3
+        padding = 1
+
+        inputs = torch.rand(shape, dtype=torch.float32).to(device)
+        offset_channels = kernel_size * kernel_size  # This is wrong channels for offset
+        offset = torch.randn((N, offset_channels, H, W), dtype=torch.float32).to(device)
+        deform = DeformConv(C, C, kernel_size=kernel_size, padding=padding).to(device)
+        self.assertRaises(RuntimeError, deform, inputs, offset)
+
+        offset_channels = kernel_size * kernel_size * 2
+        offset = torch.randn((N, offset_channels, H, W), dtype=torch.float32).to(device)
+        mask_channels = kernel_size * kernel_size * 2  # This is wrong channels for mask
+        mask = torch.ones((N, mask_channels, H, W), dtype=torch.float32).to(device)
+        modulate_deform = ModulatedDeformConv(C, C, kernel_size, padding=padding, bias=False).to(
+            device
+        )
+        self.assertRaises(RuntimeError, modulate_deform, inputs, offset, mask)
+
+    def test_repr(self):
+        module = DeformConv(3, 10, kernel_size=3, padding=1, deformable_groups=2)
+        correct_string = (
+            "DeformConv(in_channels=3, out_channels=10, kernel_size=(3, 3), "
+            "stride=(1, 1), padding=(1, 1), dilation=(1, 1), "
+            "groups=1, deformable_groups=2, bias=False)"
+        )
+        self.assertEqual(repr(module), correct_string)
+
+        module = ModulatedDeformConv(3, 10, kernel_size=3, padding=1, deformable_groups=2)
+        correct_string = (
+            "ModulatedDeformConv(in_channels=3, out_channels=10, kernel_size=(3, 3), "
+            "stride=1, padding=1, dilation=1, groups=1, deformable_groups=2, bias=True)"
+        )
+        self.assertEqual(repr(module), correct_string)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/layers/test_mask_ops.py b/src/sts/tests/layers/test_mask_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..9236ce83e525abc63b1371f88676b1d53901ca4a
--- /dev/null
+++ b/src/sts/tests/layers/test_mask_ops.py
@@ -0,0 +1,202 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import contextlib
+import io
+import numpy as np
+import unittest
+from collections import defaultdict
+import torch
+import tqdm
+from fvcore.common.benchmark import benchmark
+from pycocotools.coco import COCO
+from tabulate import tabulate
+from torch.nn import functional as F
+
+from detectron2.data import MetadataCatalog
+from detectron2.layers.mask_ops import (
+    pad_masks,
+    paste_mask_in_image_old,
+    paste_masks_in_image,
+    scale_boxes,
+)
+from detectron2.structures import BitMasks, Boxes, BoxMode, PolygonMasks
+from detectron2.structures.masks import polygons_to_bitmask
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.testing import random_boxes
+
+
+def iou_between_full_image_bit_masks(a, b):
+    intersect = (a & b).sum()
+    union = (a | b).sum()
+    return intersect / union
+
+
+def rasterize_polygons_with_grid_sample(full_image_bit_mask, box, mask_size, threshold=0.5):
+    x0, y0, x1, y1 = box[0], box[1], box[2], box[3]
+
+    img_h, img_w = full_image_bit_mask.shape
+
+    mask_y = np.arange(0.0, mask_size) + 0.5  # mask y sample coords in [0.5, mask_size - 0.5]
+    mask_x = np.arange(0.0, mask_size) + 0.5  # mask x sample coords in [0.5, mask_size - 0.5]
+    mask_y = mask_y / mask_size * (y1 - y0) + y0
+    mask_x = mask_x / mask_size * (x1 - x0) + x0
+
+    mask_x = (mask_x - 0.5) / (img_w - 1) * 2 + -1
+    mask_y = (mask_y - 0.5) / (img_h - 1) * 2 + -1
+    gy, gx = torch.meshgrid(torch.from_numpy(mask_y), torch.from_numpy(mask_x))
+    ind = torch.stack([gx, gy], dim=-1).to(dtype=torch.float32)
+
+    full_image_bit_mask = torch.from_numpy(full_image_bit_mask)
+    mask = F.grid_sample(
+        full_image_bit_mask[None, None, :, :].to(dtype=torch.float32),
+        ind[None, :, :, :],
+        align_corners=True,
+    )
+
+    return mask[0, 0] >= threshold
+
+
+class TestMaskCropPaste(unittest.TestCase):
+    def setUp(self):
+        json_file = MetadataCatalog.get("coco_2017_val_100").json_file
+        if not PathManager.isfile(json_file):
+            raise unittest.SkipTest("{} not found".format(json_file))
+        with contextlib.redirect_stdout(io.StringIO()):
+            json_file = PathManager.get_local_path(json_file)
+            self.coco = COCO(json_file)
+
+    def test_crop_paste_consistency(self):
+        """
+        rasterize_polygons_within_box (used in training)
+        and
+        paste_masks_in_image (used in inference)
+        should be inverse operations to each other.
+
+        This function runs several implementation of the above two operations and prints
+        the reconstruction error.
+        """
+
+        anns = self.coco.loadAnns(self.coco.getAnnIds(iscrowd=False))  # avoid crowd annotations
+
+        selected_anns = anns[:100]
+
+        ious = []
+        for ann in tqdm.tqdm(selected_anns):
+            results = self.process_annotation(ann)
+            ious.append([k[2] for k in results])
+
+        ious = np.array(ious)
+        mean_ious = ious.mean(axis=0)
+        table = []
+        res_dic = defaultdict(dict)
+        for row, iou in zip(results, mean_ious):
+            table.append((row[0], row[1], iou))
+            res_dic[row[0]][row[1]] = iou
+        print(tabulate(table, headers=["rasterize", "paste", "iou"], tablefmt="simple"))
+        # assert that the reconstruction is good:
+        self.assertTrue(res_dic["polygon"]["aligned"] > 0.94)
+        self.assertTrue(res_dic["roialign"]["aligned"] > 0.95)
+
+    def process_annotation(self, ann, mask_side_len=28):
+        # Parse annotation data
+        img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0]
+        height, width = img_info["height"], img_info["width"]
+        gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]]
+        gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
+        gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width)
+
+        # Run rasterize ..
+        torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4)
+        box_bitmasks = {
+            "polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0],
+            "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len),
+            "roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize(
+                torch_gt_bbox, mask_side_len
+            )[0],
+        }
+
+        # Run paste ..
+        results = defaultdict(dict)
+        for k, box_bitmask in box_bitmasks.items():
+            padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1)
+            scaled_boxes = scale_boxes(torch_gt_bbox, scale)
+
+            r = results[k]
+            r["old"] = paste_mask_in_image_old(
+                padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5
+            )
+            r["aligned"] = paste_masks_in_image(
+                box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width)
+            )[0]
+
+        table = []
+        for rasterize_method, r in results.items():
+            for paste_method, mask in r.items():
+                mask = np.asarray(mask)
+                iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask)
+                table.append((rasterize_method, paste_method, iou))
+        return table
+
+    def test_polygon_area(self):
+        # Draw polygon boxes
+        for d in [5.0, 10.0, 1000.0]:
+            polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]])
+            area = polygon.area()[0]
+            target = d ** 2
+            self.assertEqual(area, target)
+
+        # Draw polygon triangles
+        for d in [5.0, 10.0, 1000.0]:
+            polygon = PolygonMasks([[[0, 0, 0, d, d, d]]])
+            area = polygon.area()[0]
+            target = d ** 2 / 2
+            self.assertEqual(area, target)
+
+    def test_paste_mask_scriptable(self):
+        scripted_f = torch.jit.script(paste_masks_in_image)
+        N = 10
+        masks = torch.rand(N, 28, 28)
+        boxes = Boxes(random_boxes(N, 100))
+        image_shape = (150, 150)
+
+        out = paste_masks_in_image(masks, boxes, image_shape)
+        scripted_out = scripted_f(masks, boxes, image_shape)
+        self.assertTrue(torch.equal(out, scripted_out))
+
+
+def benchmark_paste():
+    S = 800
+    H, W = image_shape = (S, S)
+    N = 64
+    torch.manual_seed(42)
+    masks = torch.rand(N, 28, 28)
+
+    center = torch.rand(N, 2) * 600 + 100
+    wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50)
+    x0y0 = torch.clamp(center - wh * 0.5, min=0.0)
+    x1y1 = torch.clamp(center + wh * 0.5, max=S)
+    boxes = Boxes(torch.cat([x0y0, x1y1], axis=1))
+
+    def func(device, n=3):
+        m = masks.to(device=device)
+        b = boxes.to(device=device)
+
+        def bench():
+            for _ in range(n):
+                paste_masks_in_image(m, b, image_shape)
+            if device.type == "cuda":
+                torch.cuda.synchronize()
+
+        return bench
+
+    specs = [{"device": torch.device("cpu"), "n": 3}]
+    if torch.cuda.is_available():
+        specs.append({"device": torch.device("cuda"), "n": 3})
+
+    benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2)
+
+
+if __name__ == "__main__":
+    benchmark_paste()
+    unittest.main()
diff --git a/src/sts/tests/layers/test_nms.py b/src/sts/tests/layers/test_nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..a042db6147f110a82597c98f38e6b2221ccad53c
--- /dev/null
+++ b/src/sts/tests/layers/test_nms.py
@@ -0,0 +1,33 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from __future__ import absolute_import, division, print_function, unicode_literals
+import unittest
+import torch
+
+from detectron2.layers import batched_nms
+from detectron2.utils.testing import random_boxes
+
+
+class TestNMS(unittest.TestCase):
+    def _create_tensors(self, N):
+        boxes = random_boxes(N, 200)
+        scores = torch.rand(N)
+        return boxes, scores
+
+    def test_nms_scriptability(self):
+        N = 2000
+        num_classes = 50
+        boxes, scores = self._create_tensors(N)
+        idxs = torch.randint(0, num_classes, (N,))
+        scripted_batched_nms = torch.jit.script(batched_nms)
+        err_msg = "NMS is incompatible with jit-scripted NMS for IoU={}"
+
+        for iou in [0.2, 0.5, 0.8]:
+            keep_ref = batched_nms(boxes, scores, idxs, iou)
+            backup = boxes.clone()
+            scripted_keep = scripted_batched_nms(boxes, scores, idxs, iou)
+            assert torch.allclose(boxes, backup), "boxes modified by jit-scripted batched_nms"
+            self.assertTrue(torch.equal(keep_ref, scripted_keep), err_msg.format(iou))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/layers/test_nms_rotated.py b/src/sts/tests/layers/test_nms_rotated.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac7fb2442db3a713effbdc684133b61050a26a4b
--- /dev/null
+++ b/src/sts/tests/layers/test_nms_rotated.py
@@ -0,0 +1,175 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from __future__ import absolute_import, division, print_function, unicode_literals
+import numpy as np
+import unittest
+from copy import deepcopy
+import torch
+from torchvision import ops
+
+from detectron2.layers import batched_nms, batched_nms_rotated, nms_rotated
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.testing import random_boxes
+
+
+def nms_edit_distance(keep1, keep2):
+    """
+    Compare the "keep" result of two nms call.
+    They are allowed to be different in terms of edit distance
+    due to floating point precision issues, e.g.,
+    if a box happen to have an IoU of 0.5 with another box,
+    one implentation may choose to keep it while another may discard it.
+    """
+    keep1, keep2 = keep1.cpu(), keep2.cpu()
+    if torch.equal(keep1, keep2):
+        # they should be equal most of the time
+        return 0
+    keep1, keep2 = tuple(keep1), tuple(keep2)
+    m, n = len(keep1), len(keep2)
+
+    # edit distance with DP
+    f = [np.arange(n + 1), np.arange(n + 1)]
+    for i in range(m):
+        cur_row = i % 2
+        other_row = (i + 1) % 2
+        f[other_row][0] = i + 1
+        for j in range(n):
+            f[other_row][j + 1] = (
+                f[cur_row][j]
+                if keep1[i] == keep2[j]
+                else min(min(f[cur_row][j], f[cur_row][j + 1]), f[other_row][j]) + 1
+            )
+    return f[m % 2][n]
+
+
+class TestNMSRotated(unittest.TestCase):
+    def reference_horizontal_nms(self, boxes, scores, iou_threshold):
+        """
+        Args:
+            box_scores (N, 5): boxes in corner-form and probabilities.
+                (Note here 5 == 4 + 1, i.e., 4-dim horizontal box + 1-dim prob)
+            iou_threshold: intersection over union threshold.
+        Returns:
+             picked: a list of indexes of the kept boxes
+        """
+        picked = []
+        _, indexes = scores.sort(descending=True)
+        while len(indexes) > 0:
+            current = indexes[0]
+            picked.append(current.item())
+            if len(indexes) == 1:
+                break
+            current_box = boxes[current, :]
+            indexes = indexes[1:]
+            rest_boxes = boxes[indexes, :]
+            iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1)
+            indexes = indexes[iou <= iou_threshold]
+
+        return torch.as_tensor(picked)
+
+    def _create_tensors(self, N, device="cpu"):
+        boxes = random_boxes(N, 200, device=device)
+        scores = torch.rand(N, device=device)
+        return boxes, scores
+
+    def test_batched_nms_rotated_0_degree_cpu(self, device="cpu"):
+        N = 2000
+        num_classes = 50
+        boxes, scores = self._create_tensors(N, device=device)
+        idxs = torch.randint(0, num_classes, (N,))
+        rotated_boxes = torch.zeros(N, 5, device=device)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            backup = boxes.clone()
+            keep_ref = batched_nms(boxes, scores, idxs, iou)
+            assert torch.allclose(boxes, backup), "boxes modified by batched_nms"
+            backup = rotated_boxes.clone()
+            keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou)
+            assert torch.allclose(
+                rotated_boxes, backup
+            ), "rotated_boxes modified by batched_nms_rotated"
+            # Occasionally the gap can be large if there are many IOU on the threshold boundary
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 5, err_msg.format(iou))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_batched_nms_rotated_0_degree_cuda(self):
+        self.test_batched_nms_rotated_0_degree_cpu(device="cuda")
+
+    def test_nms_rotated_0_degree_cpu(self, device="cpu"):
+        N = 1000
+        boxes, scores = self._create_tensors(N, device=device)
+        rotated_boxes = torch.zeros(N, 5, device=device)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
+            keep = nms_rotated(rotated_boxes, scores, iou)
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_nms_rotated_0_degree_cuda(self):
+        self.test_nms_rotated_0_degree_cpu(device="cuda")
+
+    def test_nms_rotated_90_degrees_cpu(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        # Note for rotated_boxes[:, 2] and rotated_boxes[:, 3]:
+        # widths and heights are intentionally swapped here for 90 degrees case
+        # so that the reference horizontal nms could be used
+        rotated_boxes[:, 2] = boxes[:, 3] - boxes[:, 1]
+        rotated_boxes[:, 3] = boxes[:, 2] - boxes[:, 0]
+
+        rotated_boxes[:, 4] = torch.ones(N) * 90
+        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
+            keep = nms_rotated(rotated_boxes, scores, iou)
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+    def test_nms_rotated_180_degrees_cpu(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        rotated_boxes[:, 4] = torch.ones(N) * 180
+        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
+            keep = nms_rotated(rotated_boxes, scores, iou)
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+
+class TestScriptable(unittest.TestCase):
+    def setUp(self):
+        class TestingModule(torch.nn.Module):
+            def forward(self, boxes, scores, threshold):
+                return nms_rotated(boxes, scores, threshold)
+
+        self.module = TestingModule()
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_scriptable_cpu(self):
+        m = deepcopy(self.module).cpu()
+        _ = torch.jit.script(m)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_scriptable_cuda(self):
+        m = deepcopy(self.module).cuda()
+        _ = torch.jit.script(m)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/layers/test_roi_align.py b/src/sts/tests/layers/test_roi_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6fd8edefd107b727e3e523f1364fea1f4a20576
--- /dev/null
+++ b/src/sts/tests/layers/test_roi_align.py
@@ -0,0 +1,210 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+import unittest
+from copy import copy
+import cv2
+import torch
+from fvcore.common.benchmark import benchmark
+from torch.nn import functional as F
+
+from detectron2.layers.roi_align import ROIAlign, roi_align
+
+
+class ROIAlignTest(unittest.TestCase):
+    def test_forward_output(self):
+        input = np.arange(25).reshape(5, 5).astype("float32")
+        """
+        0  1  2   3 4
+        5  6  7   8 9
+        10 11 12 13 14
+        15 16 17 18 19
+        20 21 22 23 24
+        """
+
+        output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False)
+        output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True)
+
+        # without correction:
+        old_results = [
+            [7.5, 8, 8.5, 9],
+            [10, 10.5, 11, 11.5],
+            [12.5, 13, 13.5, 14],
+            [15, 15.5, 16, 16.5],
+        ]
+
+        # with 0.5 correction:
+        correct_results = [
+            [4.5, 5.0, 5.5, 6.0],
+            [7.0, 7.5, 8.0, 8.5],
+            [9.5, 10.0, 10.5, 11.0],
+            [12.0, 12.5, 13.0, 13.5],
+        ]
+        # This is an upsampled version of [[6, 7], [11, 12]]
+
+        self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten()))
+        self.assertTrue(
+            np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten())
+        )
+
+        # Also see similar issues in tensorflow at
+        # https://github.com/tensorflow/tensorflow/issues/26278
+
+    def test_resize(self):
+        H, W = 30, 30
+        input = np.random.rand(H, W).astype("float32") * 100
+        box = [10, 10, 20, 20]
+        output = self._simple_roialign(input, box, (5, 5), aligned=True)
+
+        input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR)
+        box2x = [x / 2 for x in box]
+        output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True)
+        diff = np.abs(output2x - output)
+        self.assertTrue(diff.max() < 1e-4)
+
+    def test_grid_sample_equivalence(self):
+        H, W = 30, 30
+        input = np.random.rand(H, W).astype("float32") * 100
+        box = [10, 10, 20, 20]
+        for ratio in [1, 2, 3]:
+            output = self._simple_roialign(input, box, (5, 5), sampling_ratio=ratio)
+            output_grid_sample = grid_sample_roi_align(
+                torch.from_numpy(input[None, None, :, :]).float(),
+                torch.as_tensor(box).float()[None, :],
+                5,
+                1.0,
+                ratio,
+            )
+            self.assertTrue(torch.allclose(output, output_grid_sample))
+
+    def _simple_roialign(self, img, box, resolution, sampling_ratio=0, aligned=True):
+        """
+        RoiAlign with scale 1.0.
+        """
+        if isinstance(resolution, int):
+            resolution = (resolution, resolution)
+        op = ROIAlign(resolution, 1.0, sampling_ratio, aligned=aligned)
+        input = torch.from_numpy(img[None, None, :, :].astype("float32"))
+
+        rois = [0] + list(box)
+        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
+        output = op.forward(input, rois)
+        if torch.cuda.is_available():
+            output_cuda = op.forward(input.cuda(), rois.cuda()).cpu()
+            self.assertTrue(torch.allclose(output, output_cuda))
+        return output[0, 0]
+
+    def _simple_roialign_with_grad(self, img, box, resolution, device):
+        if isinstance(resolution, int):
+            resolution = (resolution, resolution)
+
+        op = ROIAlign(resolution, 1.0, 0, aligned=True)
+        input = torch.from_numpy(img[None, None, :, :].astype("float32"))
+
+        rois = [0] + list(box)
+        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
+        input = input.to(device=device)
+        rois = rois.to(device=device)
+        input.requires_grad = True
+        output = op.forward(input, rois)
+        return input, output
+
+    def test_empty_box(self):
+        img = np.random.rand(5, 5)
+        box = [3, 4, 5, 4]
+        o = self._simple_roialign(img, box, 7)
+        self.assertTrue(o.shape == (7, 7))
+        self.assertTrue((o == 0).all())
+
+        for dev in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            input, output = self._simple_roialign_with_grad(img, box, 7, torch.device(dev))
+            output.sum().backward()
+            self.assertTrue(torch.allclose(input.grad, torch.zeros_like(input)))
+
+    def test_empty_batch(self):
+        input = torch.zeros(0, 3, 10, 10, dtype=torch.float32)
+        rois = torch.zeros(0, 5, dtype=torch.float32)
+        op = ROIAlign((7, 7), 1.0, 0, aligned=True)
+        output = op.forward(input, rois)
+        self.assertTrue(output.shape == (0, 3, 7, 7))
+
+
+def grid_sample_roi_align(input, boxes, output_size, scale, sampling_ratio):
+    # unlike true roi_align, this does not support different batch_idx
+    from detectron2.projects.point_rend.point_features import (
+        generate_regular_grid_point_coords,
+        get_point_coords_wrt_image,
+        point_sample,
+    )
+
+    N, _, H, W = input.shape
+    R = len(boxes)
+    assert N == 1
+    boxes = boxes * scale
+    grid = generate_regular_grid_point_coords(R, output_size * sampling_ratio, device=boxes.device)
+    coords = get_point_coords_wrt_image(boxes, grid)
+    coords = coords / torch.as_tensor([W, H], device=coords.device)  # R, s^2, 2
+    res = point_sample(input, coords.unsqueeze(0), align_corners=False)  # 1,C, R,s^2
+    res = (
+        res.squeeze(0)
+        .permute(1, 0, 2)
+        .reshape(R, -1, output_size * sampling_ratio, output_size * sampling_ratio)
+    )
+    res = F.avg_pool2d(res, sampling_ratio)
+    return res
+
+
+def benchmark_roi_align():
+    def random_boxes(mean_box, stdev, N, maxsize):
+        ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float)
+        ret.clamp_(min=0, max=maxsize)
+        return ret
+
+    def func(shape, nboxes_per_img, sampling_ratio, device, box_size="large"):
+        N, _, H, _ = shape
+        input = torch.rand(*shape)
+        boxes = []
+        batch_idx = []
+        for k in range(N):
+            if box_size == "large":
+                b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H)
+            else:
+                b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H)
+            boxes.append(b)
+            batch_idx.append(torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k)
+        boxes = torch.cat(boxes, axis=0)
+        batch_idx = torch.cat(batch_idx, axis=0)
+        boxes = torch.cat([batch_idx, boxes], axis=1)
+
+        input = input.to(device=device)
+        boxes = boxes.to(device=device)
+
+        def bench():
+            if False and sampling_ratio > 0 and N == 1:
+                # enable to benchmark grid_sample (slower)
+                grid_sample_roi_align(input, boxes[:, 1:], 7, 1.0, sampling_ratio)
+            else:
+                roi_align(input, boxes, 7, 1.0, sampling_ratio, True)
+            if device == "cuda":
+                torch.cuda.synchronize()
+
+        return bench
+
+    def gen_args(arg):
+        args = []
+        for size in ["small", "large"]:
+            for ratio in [0, 2]:
+                args.append(copy(arg))
+                args[-1]["sampling_ratio"] = ratio
+                args[-1]["box_size"] = size
+        return args
+
+    arg = dict(shape=(1, 512, 256, 256), nboxes_per_img=512, device="cuda")
+    benchmark(func, "cuda_roialign", gen_args(arg), num_iters=20, warmup_iters=1)
+    arg.update({"device": "cpu", "shape": (1, 256, 128, 128)})
+    benchmark(func, "cpu_roialign", gen_args(arg), num_iters=5, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    if torch.cuda.is_available():
+        benchmark_roi_align()
+    unittest.main()
diff --git a/src/sts/tests/layers/test_roi_align_rotated.py b/src/sts/tests/layers/test_roi_align_rotated.py
new file mode 100644
index 0000000000000000000000000000000000000000..7323d7d5a86816f337571221313c428238c439f4
--- /dev/null
+++ b/src/sts/tests/layers/test_roi_align_rotated.py
@@ -0,0 +1,176 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import unittest
+import cv2
+import torch
+from torch.autograd import Variable, gradcheck
+
+from detectron2.layers.roi_align import ROIAlign
+from detectron2.layers.roi_align_rotated import ROIAlignRotated
+
+logger = logging.getLogger(__name__)
+
+
+class ROIAlignRotatedTest(unittest.TestCase):
+    def _box_to_rotated_box(self, box, angle):
+        return [
+            (box[0] + box[2]) / 2.0,
+            (box[1] + box[3]) / 2.0,
+            box[2] - box[0],
+            box[3] - box[1],
+            angle,
+        ]
+
+    def _rot90(self, img, num):
+        num = num % 4  # note: -1 % 4 == 3
+        for _ in range(num):
+            img = img.transpose(0, 1).flip(0)
+        return img
+
+    def test_forward_output_0_90_180_270(self):
+        for i in range(4):
+            # i = 0, 1, 2, 3 corresponding to 0, 90, 180, 270 degrees
+            img = torch.arange(25, dtype=torch.float32).reshape(5, 5)
+            """
+            0  1  2   3 4
+            5  6  7   8 9
+            10 11 12 13 14
+            15 16 17 18 19
+            20 21 22 23 24
+            """
+            box = [1, 1, 3, 3]
+            rotated_box = self._box_to_rotated_box(box=box, angle=90 * i)
+
+            result = self._simple_roi_align_rotated(img=img, box=rotated_box, resolution=(4, 4))
+
+            # Here's an explanation for 0 degree case:
+            # point 0 in the original input lies at [0.5, 0.5]
+            # (the center of bin [0, 1] x [0, 1])
+            # point 1 in the original input lies at [1.5, 0.5], etc.
+            # since the resolution is (4, 4) that divides [1, 3] x [1, 3]
+            # into 4 x 4 equal bins,
+            # the top-left bin is [1, 1.5] x [1, 1.5], and its center
+            # (1.25, 1.25) lies at the 3/4 position
+            # between point 0 and point 1, point 5 and point 6,
+            # point 0 and point 5, point 1 and point 6, so it can be calculated as
+            # 0.25*(0*0.25+1*0.75)+(5*0.25+6*0.75)*0.75 = 4.5
+            result_expected = torch.tensor(
+                [
+                    [4.5, 5.0, 5.5, 6.0],
+                    [7.0, 7.5, 8.0, 8.5],
+                    [9.5, 10.0, 10.5, 11.0],
+                    [12.0, 12.5, 13.0, 13.5],
+                ]
+            )
+            # This is also an upsampled version of [[6, 7], [11, 12]]
+
+            # When the box is rotated by 90 degrees CCW,
+            # the result would be rotated by 90 degrees CW, thus it's -i here
+            result_expected = self._rot90(result_expected, -i)
+
+            assert torch.allclose(result, result_expected)
+
+    def test_resize(self):
+        H, W = 30, 30
+        input = torch.rand(H, W) * 100
+        box = [10, 10, 20, 20]
+        rotated_box = self._box_to_rotated_box(box, angle=0)
+        output = self._simple_roi_align_rotated(img=input, box=rotated_box, resolution=(5, 5))
+
+        input2x = cv2.resize(input.numpy(), (W // 2, H // 2), interpolation=cv2.INTER_LINEAR)
+        input2x = torch.from_numpy(input2x)
+        box2x = [x / 2 for x in box]
+        rotated_box2x = self._box_to_rotated_box(box2x, angle=0)
+        output2x = self._simple_roi_align_rotated(img=input2x, box=rotated_box2x, resolution=(5, 5))
+        assert torch.allclose(output2x, output)
+
+    def _simple_roi_align_rotated(self, img, box, resolution):
+        """
+        RoiAlignRotated with scale 1.0 and 0 sample ratio.
+        """
+        op = ROIAlignRotated(output_size=resolution, spatial_scale=1.0, sampling_ratio=0)
+        input = img[None, None, :, :]
+
+        rois = [0] + list(box)
+        rois = torch.tensor(rois, dtype=torch.float32)[None, :]
+        result_cpu = op.forward(input, rois)
+        if torch.cuda.is_available():
+            result_cuda = op.forward(input.cuda(), rois.cuda())
+            assert torch.allclose(result_cpu, result_cuda.cpu())
+        return result_cpu[0, 0]
+
+    def test_empty_box(self):
+        img = torch.rand(5, 5)
+        out = self._simple_roi_align_rotated(img, [2, 3, 0, 0, 0], (7, 7))
+        self.assertTrue((out == 0).all())
+
+    def test_roi_align_rotated_gradcheck_cpu(self):
+        dtype = torch.float64
+        device = torch.device("cpu")
+        roi_align_rotated_op = ROIAlignRotated(
+            output_size=(5, 5), spatial_scale=0.5, sampling_ratio=1
+        ).to(dtype=dtype, device=device)
+        x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
+        # roi format is (batch index, x_center, y_center, width, height, angle)
+        rois = torch.tensor(
+            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
+            dtype=dtype,
+            device=device,
+        )
+
+        def func(input):
+            return roi_align_rotated_op(input, rois)
+
+        assert gradcheck(func, (x,)), "gradcheck failed for RoIAlignRotated CPU"
+        assert gradcheck(func, (x.transpose(2, 3),)), "gradcheck failed for RoIAlignRotated CPU"
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_roi_align_rotated_gradient_cuda(self):
+        """
+        Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU,
+        and compare the result with ROIAlign
+        """
+        # torch.manual_seed(123)
+        dtype = torch.float64
+        device = torch.device("cuda")
+        pool_h, pool_w = (5, 5)
+
+        roi_align = ROIAlign(output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(
+            device=device
+        )
+
+        roi_align_rotated = ROIAlignRotated(
+            output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2
+        ).to(device=device)
+
+        x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
+        # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)!
+        x_rotated = Variable(x.data.clone(), requires_grad=True)
+
+        # roi_rotated format is (batch index, x_center, y_center, width, height, angle)
+        rois_rotated = torch.tensor(
+            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
+            dtype=dtype,
+            device=device,
+        )
+
+        y_rotated = roi_align_rotated(x_rotated, rois_rotated)
+        s_rotated = y_rotated.sum()
+        s_rotated.backward()
+
+        # roi format is (batch index, x1, y1, x2, y2)
+        rois = torch.tensor(
+            [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]], dtype=dtype, device=device
+        )
+
+        y = roi_align(x, rois)
+        s = y.sum()
+        s.backward()
+
+        assert torch.allclose(
+            x.grad, x_rotated.grad
+        ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/modeling/__init__.py b/src/sts/tests/modeling/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/sts/tests/modeling/test_anchor_generator.py b/src/sts/tests/modeling/test_anchor_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..13a808e587382216da6fe7ee957603f448172657
--- /dev/null
+++ b/src/sts/tests/modeling/test_anchor_generator.py
@@ -0,0 +1,120 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import unittest
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator, RotatedAnchorGenerator
+
+logger = logging.getLogger(__name__)
+
+
+class TestAnchorGenerator(unittest.TestCase):
+    def test_default_anchor_generator(self):
+        cfg = get_cfg()
+        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
+        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]]
+
+        anchor_generator = DefaultAnchorGenerator(cfg, [ShapeSpec(stride=4)])
+
+        # only the last two dimensions of features matter here
+        num_images = 2
+        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
+        anchors = anchor_generator([features["stage3"]])
+        expected_anchor_tensor = torch.tensor(
+            [
+                [-32.0, -8.0, 32.0, 8.0],
+                [-16.0, -16.0, 16.0, 16.0],
+                [-8.0, -32.0, 8.0, 32.0],
+                [-64.0, -16.0, 64.0, 16.0],
+                [-32.0, -32.0, 32.0, 32.0],
+                [-16.0, -64.0, 16.0, 64.0],
+                [-28.0, -8.0, 36.0, 8.0],  # -28.0 == -32.0 + STRIDE (4)
+                [-12.0, -16.0, 20.0, 16.0],
+                [-4.0, -32.0, 12.0, 32.0],
+                [-60.0, -16.0, 68.0, 16.0],
+                [-28.0, -32.0, 36.0, 32.0],
+                [-12.0, -64.0, 20.0, 64.0],
+            ]
+        )
+
+        self.assertTrue(torch.allclose(anchors[0].tensor, expected_anchor_tensor))
+
+    def test_default_anchor_generator_centered(self):
+        # test explicit args
+        anchor_generator = DefaultAnchorGenerator(
+            sizes=[32, 64], aspect_ratios=[0.25, 1, 4], strides=[4]
+        )
+
+        # only the last two dimensions of features matter here
+        num_images = 2
+        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
+        expected_anchor_tensor = torch.tensor(
+            [
+                [-30.0, -6.0, 34.0, 10.0],
+                [-14.0, -14.0, 18.0, 18.0],
+                [-6.0, -30.0, 10.0, 34.0],
+                [-62.0, -14.0, 66.0, 18.0],
+                [-30.0, -30.0, 34.0, 34.0],
+                [-14.0, -62.0, 18.0, 66.0],
+                [-26.0, -6.0, 38.0, 10.0],
+                [-10.0, -14.0, 22.0, 18.0],
+                [-2.0, -30.0, 14.0, 34.0],
+                [-58.0, -14.0, 70.0, 18.0],
+                [-26.0, -30.0, 38.0, 34.0],
+                [-10.0, -62.0, 22.0, 66.0],
+            ]
+        )
+
+        anchors = anchor_generator([features["stage3"]])
+        self.assertTrue(torch.allclose(anchors[0].tensor, expected_anchor_tensor))
+
+        anchors = torch.jit.script(anchor_generator)([features["stage3"]])
+        self.assertTrue(torch.allclose(anchors[0].tensor, expected_anchor_tensor))
+
+    def test_rrpn_anchor_generator(self):
+        cfg = get_cfg()
+        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
+        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]]
+        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [0, 45]  # test single list[float]
+        anchor_generator = RotatedAnchorGenerator(cfg, [ShapeSpec(stride=4)])
+
+        # only the last two dimensions of features matter here
+        num_images = 2
+        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
+        anchors = anchor_generator([features["stage3"]])
+        expected_anchor_tensor = torch.tensor(
+            [
+                [0.0, 0.0, 64.0, 16.0, 0.0],
+                [0.0, 0.0, 64.0, 16.0, 45.0],
+                [0.0, 0.0, 32.0, 32.0, 0.0],
+                [0.0, 0.0, 32.0, 32.0, 45.0],
+                [0.0, 0.0, 16.0, 64.0, 0.0],
+                [0.0, 0.0, 16.0, 64.0, 45.0],
+                [0.0, 0.0, 128.0, 32.0, 0.0],
+                [0.0, 0.0, 128.0, 32.0, 45.0],
+                [0.0, 0.0, 64.0, 64.0, 0.0],
+                [0.0, 0.0, 64.0, 64.0, 45.0],
+                [0.0, 0.0, 32.0, 128.0, 0.0],
+                [0.0, 0.0, 32.0, 128.0, 45.0],
+                [4.0, 0.0, 64.0, 16.0, 0.0],  # 4.0 == 0.0 + STRIDE (4)
+                [4.0, 0.0, 64.0, 16.0, 45.0],
+                [4.0, 0.0, 32.0, 32.0, 0.0],
+                [4.0, 0.0, 32.0, 32.0, 45.0],
+                [4.0, 0.0, 16.0, 64.0, 0.0],
+                [4.0, 0.0, 16.0, 64.0, 45.0],
+                [4.0, 0.0, 128.0, 32.0, 0.0],
+                [4.0, 0.0, 128.0, 32.0, 45.0],
+                [4.0, 0.0, 64.0, 64.0, 0.0],
+                [4.0, 0.0, 64.0, 64.0, 45.0],
+                [4.0, 0.0, 32.0, 128.0, 0.0],
+                [4.0, 0.0, 32.0, 128.0, 45.0],
+            ]
+        )
+
+        self.assertTrue(torch.allclose(anchors[0].tensor, expected_anchor_tensor))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/modeling/test_backbone.py b/src/sts/tests/modeling/test_backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d2a00ec189a536db3c309440085d40c3db39b3c
--- /dev/null
+++ b/src/sts/tests/modeling/test_backbone.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import unittest
+import torch
+
+import detectron2.export.torchscript  # apply patch # noqa
+from detectron2 import model_zoo
+from detectron2.config import get_cfg
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import build_resnet_backbone
+from detectron2.modeling.backbone.fpn import build_resnet_fpn_backbone
+from detectron2.utils.env import TORCH_VERSION
+
+
+class TestBackBone(unittest.TestCase):
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_resnet_scriptability(self):
+        cfg = get_cfg()
+        resnet = build_resnet_backbone(cfg, ShapeSpec(channels=3))
+
+        scripted_resnet = torch.jit.script(resnet)
+
+        inp = torch.rand(2, 3, 100, 100)
+        out1 = resnet(inp)["res4"]
+        out2 = scripted_resnet(inp)["res4"]
+        self.assertTrue(torch.allclose(out1, out2))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_fpn_scriptability(self):
+        cfg = model_zoo.get_config("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml")
+        bb = build_resnet_fpn_backbone(cfg, ShapeSpec(channels=3))
+        bb_s = torch.jit.script(bb)
+
+        inp = torch.rand(2, 3, 128, 128)
+        out1 = bb(inp)["p5"]
+        out2 = bb_s(inp)["p5"]
+        self.assertTrue(torch.allclose(out1, out2))
diff --git a/src/sts/tests/modeling/test_box2box_transform.py b/src/sts/tests/modeling/test_box2box_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..60a9c2c3a2652d4a5d3704feb6d14f27cc8e3f67
--- /dev/null
+++ b/src/sts/tests/modeling/test_box2box_transform.py
@@ -0,0 +1,75 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import unittest
+import torch
+
+from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.testing import random_boxes
+
+logger = logging.getLogger(__name__)
+
+
+class TestBox2BoxTransform(unittest.TestCase):
+    def test_reconstruction(self):
+        weights = (5, 5, 10, 10)
+        b2b_tfm = Box2BoxTransform(weights=weights)
+        src_boxes = random_boxes(10)
+        dst_boxes = random_boxes(10)
+
+        devices = [torch.device("cpu")]
+        if torch.cuda.is_available():
+            devices.append(torch.device("cuda"))
+        for device in devices:
+            src_boxes = src_boxes.to(device=device)
+            dst_boxes = dst_boxes.to(device=device)
+            deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes)
+            dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes)
+            self.assertTrue(torch.allclose(dst_boxes, dst_boxes_reconstructed))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_apply_deltas_tracing(self):
+        weights = (5, 5, 10, 10)
+        b2b_tfm = Box2BoxTransform(weights=weights)
+
+        with torch.no_grad():
+            func = torch.jit.trace(b2b_tfm.apply_deltas, (torch.randn(10, 20), torch.randn(10, 4)))
+
+            o = func(torch.randn(10, 20), torch.randn(10, 4))
+            self.assertEqual(o.shape, (10, 20))
+            o = func(torch.randn(5, 20), torch.randn(5, 4))
+            self.assertEqual(o.shape, (5, 20))
+
+
+def random_rotated_boxes(mean_box, std_length, std_angle, N):
+    return torch.cat(
+        [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1
+    ) + torch.tensor(mean_box, dtype=torch.float)
+
+
+class TestBox2BoxTransformRotated(unittest.TestCase):
+    def test_reconstruction(self):
+        weights = (5, 5, 10, 10, 1)
+        b2b_transform = Box2BoxTransformRotated(weights=weights)
+        src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
+        dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
+
+        devices = [torch.device("cpu")]
+        if torch.cuda.is_available():
+            devices.append(torch.device("cuda"))
+        for device in devices:
+            src_boxes = src_boxes.to(device=device)
+            dst_boxes = dst_boxes.to(device=device)
+            deltas = b2b_transform.get_deltas(src_boxes, dst_boxes)
+            dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes)
+            assert torch.allclose(dst_boxes[:, :4], dst_boxes_reconstructed[:, :4], atol=1e-5)
+            # angle difference has to be normalized
+            assert torch.allclose(
+                (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) % 360.0 - 180.0,
+                torch.zeros_like(dst_boxes[:, 4]),
+                atol=1e-4,
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/modeling/test_fast_rcnn.py b/src/sts/tests/modeling/test_fast_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..da34e84a468d2873edbfa8354807007a72c250ff
--- /dev/null
+++ b/src/sts/tests/modeling/test_fast_rcnn.py
@@ -0,0 +1,173 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import unittest
+import torch
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated
+from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
+from detectron2.modeling.roi_heads.rotated_fast_rcnn import RotatedFastRCNNOutputLayers
+from detectron2.structures import Boxes, Instances, RotatedBoxes
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.events import EventStorage
+
+logger = logging.getLogger(__name__)
+
+
+class FastRCNNTest(unittest.TestCase):
+    def test_fast_rcnn(self):
+        torch.manual_seed(132)
+
+        box_head_output_size = 8
+
+        box_predictor = FastRCNNOutputLayers(
+            ShapeSpec(channels=box_head_output_size),
+            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
+            num_classes=5,
+        )
+        feature_pooled = torch.rand(2, box_head_output_size)
+        predictions = box_predictor(feature_pooled)
+
+        proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32)
+        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        proposal = Instances((10, 10))
+        proposal.proposal_boxes = Boxes(proposal_boxes)
+        proposal.gt_boxes = Boxes(gt_boxes)
+        proposal.gt_classes = torch.tensor([1, 2])
+
+        with EventStorage():  # capture events in a new storage to discard them
+            losses = box_predictor.losses(predictions, [proposal])
+
+        expected_losses = {
+            "loss_cls": torch.tensor(1.7951188087),
+            "loss_box_reg": torch.tensor(4.0357131958),
+        }
+        for name in expected_losses.keys():
+            assert torch.allclose(losses[name], expected_losses[name])
+
+    def test_fast_rcnn_empty_batch(self, device="cpu"):
+        box_predictor = FastRCNNOutputLayers(
+            ShapeSpec(channels=10),
+            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
+            num_classes=8,
+        ).to(device=device)
+
+        logits = torch.randn(0, 100, requires_grad=True, device=device)
+        deltas = torch.randn(0, 4, requires_grad=True, device=device)
+        losses = box_predictor.losses([logits, deltas], [])
+        for value in losses.values():
+            self.assertTrue(torch.allclose(value, torch.zeros_like(value)))
+        sum(losses.values()).backward()
+        self.assertTrue(logits.grad is not None)
+        self.assertTrue(deltas.grad is not None)
+
+        predictions, _ = box_predictor.inference([logits, deltas], [])
+        self.assertEqual(len(predictions), 0)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_fast_rcnn_empty_batch_cuda(self):
+        self.test_fast_rcnn_empty_batch(device=torch.device("cuda"))
+
+    def test_fast_rcnn_rotated(self):
+        torch.manual_seed(132)
+        box_head_output_size = 8
+
+        box_predictor = RotatedFastRCNNOutputLayers(
+            ShapeSpec(channels=box_head_output_size),
+            box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)),
+            num_classes=5,
+        )
+        feature_pooled = torch.rand(2, box_head_output_size)
+        predictions = box_predictor(feature_pooled)
+        proposal_boxes = torch.tensor(
+            [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32
+        )
+        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
+        proposal = Instances((10, 10))
+        proposal.proposal_boxes = RotatedBoxes(proposal_boxes)
+        proposal.gt_boxes = RotatedBoxes(gt_boxes)
+        proposal.gt_classes = torch.tensor([1, 2])
+
+        with EventStorage():  # capture events in a new storage to discard them
+            losses = box_predictor.losses(predictions, [proposal])
+
+        # Note: the expected losses are slightly different even if
+        # the boxes are essentially the same as in the FastRCNNOutput test, because
+        # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization
+        # between the two cases.
+        expected_losses = {
+            "loss_cls": torch.tensor(1.7920907736),
+            "loss_box_reg": torch.tensor(4.0410838127),
+        }
+        for name in expected_losses.keys():
+            assert torch.allclose(losses[name], expected_losses[name])
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_predict_boxes_tracing(self):
+        class Model(torch.nn.Module):
+            def __init__(self, output_layer):
+                super(Model, self).__init__()
+                self._output_layer = output_layer
+
+            def forward(self, proposal_deltas, proposal_boxes):
+                instances = Instances((10, 10))
+                instances.proposal_boxes = Boxes(proposal_boxes)
+                return self._output_layer.predict_boxes((None, proposal_deltas), [instances])
+
+        box_head_output_size = 8
+
+        box_predictor = FastRCNNOutputLayers(
+            ShapeSpec(channels=box_head_output_size),
+            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
+            num_classes=5,
+        )
+
+        model = Model(box_predictor)
+
+        from detectron2.export.torchscript_patch import patch_builtin_len
+
+        with torch.no_grad(), patch_builtin_len():
+            func = torch.jit.trace(model, (torch.randn(10, 20), torch.randn(10, 4)))
+
+            o = func(torch.randn(10, 20), torch.randn(10, 4))
+            self.assertEqual(o[0].shape, (10, 20))
+            o = func(torch.randn(5, 20), torch.randn(5, 4))
+            self.assertEqual(o[0].shape, (5, 20))
+            o = func(torch.randn(20, 20), torch.randn(20, 4))
+            self.assertEqual(o[0].shape, (20, 20))
+
+    def test_predict_probs_tracing(self):
+        class Model(torch.nn.Module):
+            def __init__(self, output_layer):
+                super(Model, self).__init__()
+                self._output_layer = output_layer
+
+            def forward(self, scores, proposal_boxes):
+                instances = Instances((10, 10))
+                instances.proposal_boxes = Boxes(proposal_boxes)
+                return self._output_layer.predict_probs((scores, None), [instances])
+
+        box_head_output_size = 8
+
+        box_predictor = FastRCNNOutputLayers(
+            ShapeSpec(channels=box_head_output_size),
+            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
+            num_classes=5,
+        )
+
+        model = Model(box_predictor)
+
+        from detectron2.export.torchscript_patch import patch_builtin_len
+
+        with torch.no_grad(), patch_builtin_len():
+            func = torch.jit.trace(model, (torch.randn(10, 6), torch.rand(10, 4)))
+            o = func(torch.randn(10, 6), torch.randn(10, 4))
+            self.assertEqual(o[0].shape, (10, 6))
+            o = func(torch.randn(5, 6), torch.randn(5, 4))
+            self.assertEqual(o[0].shape, (5, 6))
+            o = func(torch.randn(20, 6), torch.randn(20, 4))
+            self.assertEqual(o[0].shape, (20, 6))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/modeling/test_matcher.py b/src/sts/tests/modeling/test_matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..6eb2db0c24b117337c431e9ef00a85a3bced71b9
--- /dev/null
+++ b/src/sts/tests/modeling/test_matcher.py
@@ -0,0 +1,42 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import unittest
+from typing import List
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.modeling.matcher import Matcher
+
+
+class TestMatcher(unittest.TestCase):
+    def test_scriptability(self):
+        cfg = get_cfg()
+        anchor_matcher = Matcher(
+            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True
+        )
+        match_quality_matrix = torch.tensor(
+            [[0.15, 0.45, 0.2, 0.6], [0.3, 0.65, 0.05, 0.1], [0.05, 0.4, 0.25, 0.4]]
+        )
+        expected_matches = torch.tensor([1, 1, 2, 0])
+        expected_match_labels = torch.tensor([-1, 1, 0, 1], dtype=torch.int8)
+
+        matches, match_labels = anchor_matcher(match_quality_matrix)
+        self.assertTrue(torch.allclose(matches, expected_matches))
+        self.assertTrue(torch.allclose(match_labels, expected_match_labels))
+
+        # nonzero_tuple must be import explicitly to let jit know what it is.
+        # https://github.com/pytorch/pytorch/issues/38964
+        from detectron2.layers import nonzero_tuple  # noqa F401
+
+        def f(thresholds: List[float], labels: List[int]):
+            return Matcher(thresholds, labels, allow_low_quality_matches=True)
+
+        scripted_anchor_matcher = torch.jit.script(f)(
+            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS
+        )
+        matches, match_labels = scripted_anchor_matcher(match_quality_matrix)
+        self.assertTrue(torch.allclose(matches, expected_matches))
+        self.assertTrue(torch.allclose(match_labels, expected_match_labels))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/modeling/test_mmdet.py b/src/sts/tests/modeling/test_mmdet.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f06f530cff85fea0227784d68d7c3018038cfa1
--- /dev/null
+++ b/src/sts/tests/modeling/test_mmdet.py
@@ -0,0 +1,185 @@
+import unittest
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.mmdet_wrapper import MMDetBackbone, MMDetDetector
+
+try:
+    import mmdet.models  # noqa
+
+    HAS_MMDET = True
+except ImportError:
+    HAS_MMDET = False
+
+
+@unittest.skipIf(not HAS_MMDET, "mmdet not available")
+class TestMMDetWrapper(unittest.TestCase):
+    def test_backbone(self):
+        MMDetBackbone(
+            backbone=dict(
+                type="DetectoRS_ResNet",
+                conv_cfg=dict(type="ConvAWS"),
+                sac=dict(type="SAC", use_deform=True),
+                stage_with_sac=(False, True, True, True),
+                depth=50,
+                num_stages=4,
+                out_indices=(0, 1, 2, 3),
+                frozen_stages=1,
+                norm_cfg=dict(type="BN", requires_grad=True),
+                norm_eval=True,
+                style="pytorch",
+            ),
+            neck=dict(
+                type="FPN",
+                in_channels=[256, 512, 1024, 2048],
+                out_channels=256,
+                num_outs=5,
+            ),
+            # skip pretrained model for tests
+            # pretrained_backbone="torchvision://resnet50",
+            output_shapes=[ShapeSpec(channels=256, stride=s) for s in [4, 8, 16, 32, 64]],
+            output_names=["p2", "p3", "p4", "p5", "p6"],
+        )
+
+    def test_detector(self):
+        # a basic R50 Mask R-CNN
+        MMDetDetector(
+            detector=dict(
+                type="MaskRCNN",
+                # skip pretrained model for tests
+                # pretrained="torchvision://resnet50",
+                backbone=dict(
+                    type="ResNet",
+                    depth=50,
+                    num_stages=4,
+                    out_indices=(0, 1, 2, 3),
+                    frozen_stages=1,
+                    norm_cfg=dict(type="BN", requires_grad=True),
+                    norm_eval=True,
+                    style="pytorch",
+                ),
+                neck=dict(
+                    type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5
+                ),
+                rpn_head=dict(
+                    type="RPNHead",
+                    in_channels=256,
+                    feat_channels=256,
+                    anchor_generator=dict(
+                        type="AnchorGenerator",
+                        scales=[8],
+                        ratios=[0.5, 1.0, 2.0],
+                        strides=[4, 8, 16, 32, 64],
+                    ),
+                    bbox_coder=dict(
+                        type="DeltaXYWHBBoxCoder",
+                        target_means=[0.0, 0.0, 0.0, 0.0],
+                        target_stds=[1.0, 1.0, 1.0, 1.0],
+                    ),
+                    loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
+                    loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+                ),
+                roi_head=dict(
+                    type="StandardRoIHead",
+                    bbox_roi_extractor=dict(
+                        type="SingleRoIExtractor",
+                        roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
+                        out_channels=256,
+                        featmap_strides=[4, 8, 16, 32],
+                    ),
+                    bbox_head=dict(
+                        type="Shared2FCBBoxHead",
+                        in_channels=256,
+                        fc_out_channels=1024,
+                        roi_feat_size=7,
+                        num_classes=80,
+                        bbox_coder=dict(
+                            type="DeltaXYWHBBoxCoder",
+                            target_means=[0.0, 0.0, 0.0, 0.0],
+                            target_stds=[0.1, 0.1, 0.2, 0.2],
+                        ),
+                        reg_class_agnostic=False,
+                        loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
+                        loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+                    ),
+                    mask_roi_extractor=dict(
+                        type="SingleRoIExtractor",
+                        roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
+                        out_channels=256,
+                        featmap_strides=[4, 8, 16, 32],
+                    ),
+                    mask_head=dict(
+                        type="FCNMaskHead",
+                        num_convs=4,
+                        in_channels=256,
+                        conv_out_channels=256,
+                        num_classes=80,
+                        loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
+                    ),
+                ),
+                # model training and testing settings
+                train_cfg=dict(
+                    rpn=dict(
+                        assigner=dict(
+                            type="MaxIoUAssigner",
+                            pos_iou_thr=0.7,
+                            neg_iou_thr=0.3,
+                            min_pos_iou=0.3,
+                            match_low_quality=True,
+                            ignore_iof_thr=-1,
+                        ),
+                        sampler=dict(
+                            type="RandomSampler",
+                            num=256,
+                            pos_fraction=0.5,
+                            neg_pos_ub=-1,
+                            add_gt_as_proposals=False,
+                        ),
+                        allowed_border=-1,
+                        pos_weight=-1,
+                        debug=False,
+                    ),
+                    rpn_proposal=dict(
+                        nms_pre=2000,
+                        max_per_img=1000,
+                        nms=dict(type="nms", iou_threshold=0.7),
+                        min_bbox_size=0,
+                    ),
+                    rcnn=dict(
+                        assigner=dict(
+                            type="MaxIoUAssigner",
+                            pos_iou_thr=0.5,
+                            neg_iou_thr=0.5,
+                            min_pos_iou=0.5,
+                            match_low_quality=True,
+                            ignore_iof_thr=-1,
+                        ),
+                        sampler=dict(
+                            type="RandomSampler",
+                            num=512,
+                            pos_fraction=0.25,
+                            neg_pos_ub=-1,
+                            add_gt_as_proposals=True,
+                        ),
+                        mask_size=28,
+                        pos_weight=-1,
+                        debug=False,
+                    ),
+                ),
+                test_cfg=dict(
+                    rpn=dict(
+                        nms_pre=1000,
+                        max_per_img=1000,
+                        nms=dict(type="nms", iou_threshold=0.7),
+                        min_bbox_size=0,
+                    ),
+                    rcnn=dict(
+                        score_thr=0.05,
+                        nms=dict(type="nms", iou_threshold=0.5),
+                        max_per_img=100,
+                        mask_thr_binary=0.5,
+                    ),
+                ),
+            ),
+            pixel_mean=[1, 2, 3],
+            pixel_std=[1, 2, 3],
+        )
diff --git a/src/sts/tests/modeling/test_model_e2e.py b/src/sts/tests/modeling/test_model_e2e.py
new file mode 100644
index 0000000000000000000000000000000000000000..db9dde2d89fc6a77c9cdf869a9e7ecbb180e6ff4
--- /dev/null
+++ b/src/sts/tests/modeling/test_model_e2e.py
@@ -0,0 +1,211 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+
+import itertools
+import numpy as np
+import unittest
+from contextlib import contextmanager
+from copy import deepcopy
+import torch
+
+from detectron2.structures import BitMasks, Boxes, ImageList, Instances
+from detectron2.utils.events import EventStorage
+from detectron2.utils.testing import get_model_no_weights
+
+
+@contextmanager
+def typecheck_hook(model, *, in_dtype=None, out_dtype=None):
+    """
+    Check that the model must be called with the given input/output dtype
+    """
+    if not isinstance(in_dtype, set):
+        in_dtype = {in_dtype}
+    if not isinstance(out_dtype, set):
+        out_dtype = {out_dtype}
+
+    def flatten(x):
+        if isinstance(x, torch.Tensor):
+            return [x]
+        if isinstance(x, (list, tuple)):
+            return list(itertools.chain(*[flatten(t) for t in x]))
+        if isinstance(x, dict):
+            return flatten(list(x.values()))
+        return []
+
+    def hook(module, input, output):
+        if in_dtype is not None:
+            dtypes = {x.dtype for x in flatten(input)}
+            assert (
+                dtypes == in_dtype
+            ), f"Expected input dtype of {type(module)} is {in_dtype}. Got {dtypes} instead!"
+
+        if out_dtype is not None:
+            dtypes = {x.dtype for x in flatten(output)}
+            assert (
+                dtypes == out_dtype
+            ), f"Expected output dtype of {type(module)} is {out_dtype}. Got {dtypes} instead!"
+
+    with model.register_forward_hook(hook):
+        yield
+
+
+def create_model_input(img, inst=None):
+    if inst is not None:
+        return {"image": img, "instances": inst}
+    else:
+        return {"image": img}
+
+
+def get_empty_instance(h, w):
+    inst = Instances((h, w))
+    inst.gt_boxes = Boxes(torch.rand(0, 4))
+    inst.gt_classes = torch.tensor([]).to(dtype=torch.int64)
+    inst.gt_masks = BitMasks(torch.rand(0, h, w))
+    return inst
+
+
+def get_regular_bitmask_instances(h, w):
+    inst = Instances((h, w))
+    inst.gt_boxes = Boxes(torch.rand(3, 4))
+    inst.gt_boxes.tensor[:, 2:] += inst.gt_boxes.tensor[:, :2]
+    inst.gt_classes = torch.tensor([3, 4, 5]).to(dtype=torch.int64)
+    inst.gt_masks = BitMasks((torch.rand(3, h, w) > 0.5))
+    return inst
+
+
+class ModelE2ETest:
+    def setUp(self):
+        torch.manual_seed(43)
+        self.model = get_model_no_weights(self.CONFIG_PATH)
+
+    def _test_eval(self, input_sizes):
+        inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes]
+        self.model.eval()
+        self.model(inputs)
+
+    def _test_train(self, input_sizes, instances):
+        assert len(input_sizes) == len(instances)
+        inputs = [
+            create_model_input(torch.rand(3, s[0], s[1]), inst)
+            for s, inst in zip(input_sizes, instances)
+        ]
+        self.model.train()
+        with EventStorage():
+            losses = self.model(inputs)
+            sum(losses.values()).backward()
+            del losses
+
+    def _inf_tensor(self, *shape):
+        return 1.0 / torch.zeros(*shape, device=self.model.device)
+
+    def _nan_tensor(self, *shape):
+        return torch.zeros(*shape, device=self.model.device).fill_(float("nan"))
+
+    def test_empty_data(self):
+        instances = [get_empty_instance(200, 250), get_empty_instance(200, 249)]
+        self._test_eval([(200, 250), (200, 249)])
+        self._test_train([(200, 250), (200, 249)], instances)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    def test_eval_tocpu(self):
+        model = deepcopy(self.model).cpu()
+        model.eval()
+        input_sizes = [(200, 250), (200, 249)]
+        inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes]
+        model(inputs)
+
+
+class MaskRCNNE2ETest(ModelE2ETest, unittest.TestCase):
+    CONFIG_PATH = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+
+    def test_half_empty_data(self):
+        instances = [get_empty_instance(200, 250), get_regular_bitmask_instances(200, 249)]
+        self._test_train([(200, 250), (200, 249)], instances)
+
+    # This test is flaky because in some environment the output features are zero due to relu
+    # def test_rpn_inf_nan_data(self):
+    #     self.model.eval()
+    #     for tensor in [self._inf_tensor, self._nan_tensor]:
+    #         images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
+    #         features = {
+    #             "p2": tensor(1, 256, 256, 256),
+    #             "p3": tensor(1, 256, 128, 128),
+    #             "p4": tensor(1, 256, 64, 64),
+    #             "p5": tensor(1, 256, 32, 32),
+    #             "p6": tensor(1, 256, 16, 16),
+    #         }
+    #         props, _ = self.model.proposal_generator(images, features)
+    #         self.assertEqual(len(props[0]), 0)
+
+    def test_roiheads_inf_nan_data(self):
+        self.model.eval()
+        for tensor in [self._inf_tensor, self._nan_tensor]:
+            images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
+            features = {
+                "p2": tensor(1, 256, 256, 256),
+                "p3": tensor(1, 256, 128, 128),
+                "p4": tensor(1, 256, 64, 64),
+                "p5": tensor(1, 256, 32, 32),
+                "p6": tensor(1, 256, 16, 16),
+            }
+            props = [Instances((510, 510))]
+            props[0].proposal_boxes = Boxes([[10, 10, 20, 20]]).to(device=self.model.device)
+            props[0].objectness_logits = torch.tensor([1.0]).reshape(1, 1)
+            det, _ = self.model.roi_heads(images, features, props)
+            self.assertEqual(len(det[0]), 0)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_autocast(self):
+        from torch.cuda.amp import autocast
+
+        inputs = [{"image": torch.rand(3, 100, 100)}]
+        self.model.eval()
+        with autocast(), typecheck_hook(
+            self.model.backbone, in_dtype=torch.float32, out_dtype=torch.float16
+        ), typecheck_hook(
+            self.model.roi_heads.box_predictor, in_dtype=torch.float16, out_dtype=torch.float16
+        ):
+            out = self.model.inference(inputs, do_postprocess=False)[0]
+            self.assertEqual(out.pred_boxes.tensor.dtype, torch.float32)
+            self.assertEqual(out.pred_masks.dtype, torch.float16)
+            self.assertEqual(out.scores.dtype, torch.float32)  # scores comes from softmax
+
+
+class RetinaNetE2ETest(ModelE2ETest, unittest.TestCase):
+    CONFIG_PATH = "COCO-Detection/retinanet_R_50_FPN_1x.yaml"
+
+    def test_inf_nan_data(self):
+        self.model.eval()
+        self.model.score_threshold = -999999999
+        for tensor in [self._inf_tensor, self._nan_tensor]:
+            images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
+            features = [
+                tensor(1, 256, 128, 128),
+                tensor(1, 256, 64, 64),
+                tensor(1, 256, 32, 32),
+                tensor(1, 256, 16, 16),
+                tensor(1, 256, 8, 8),
+            ]
+            anchors = self.model.anchor_generator(features)
+            _, pred_anchor_deltas = self.model.head(features)
+            HWAs = [np.prod(x.shape[-3:]) // 4 for x in pred_anchor_deltas]
+
+            pred_logits = [tensor(1, HWA, self.model.num_classes) for HWA in HWAs]
+            pred_anchor_deltas = [tensor(1, HWA, 4) for HWA in HWAs]
+            det = self.model.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes)
+            # all predictions (if any) are infinite or nan
+            if len(det[0]):
+                self.assertTrue(torch.isfinite(det[0].pred_boxes.tensor).sum() == 0)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_autocast(self):
+        from torch.cuda.amp import autocast
+
+        inputs = [{"image": torch.rand(3, 100, 100)}]
+        self.model.eval()
+        with autocast(), typecheck_hook(
+            self.model.backbone, in_dtype=torch.float32, out_dtype=torch.float16
+        ), typecheck_hook(self.model.head, in_dtype=torch.float16, out_dtype=torch.float16):
+            out = self.model(inputs)[0]["instances"]
+            self.assertEqual(out.pred_boxes.tensor.dtype, torch.float32)
+            self.assertEqual(out.scores.dtype, torch.float16)
diff --git a/src/sts/tests/modeling/test_roi_heads.py b/src/sts/tests/modeling/test_roi_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1d986a0ee3b884a161b5938cedbe144057d5ece
--- /dev/null
+++ b/src/sts/tests/modeling/test_roi_heads.py
@@ -0,0 +1,329 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import unittest
+from copy import deepcopy
+import torch
+from torch import nn
+
+from detectron2 import model_zoo
+from detectron2.config import get_cfg
+from detectron2.export.torchscript_patch import (
+    freeze_training_mode,
+    patch_builtin_len,
+    patch_instances,
+)
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.proposal_generator.build import build_proposal_generator
+from detectron2.modeling.roi_heads import (
+    FastRCNNConvFCHead,
+    KRCNNConvDeconvUpsampleHead,
+    MaskRCNNConvUpsampleHead,
+    StandardROIHeads,
+    build_roi_heads,
+)
+from detectron2.projects import point_rend
+from detectron2.structures import BitMasks, Boxes, ImageList, Instances, RotatedBoxes
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.events import EventStorage
+from detectron2.utils.testing import assert_instances_allclose, random_boxes
+
+logger = logging.getLogger(__name__)
+
+"""
+Make sure the losses of ROIHeads/RPN do not change, to avoid
+breaking the forward logic by mistake.
+This relies on assumption that pytorch's RNG is stable.
+"""
+
+
+class ROIHeadsTest(unittest.TestCase):
+    def test_roi_heads(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
+        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
+        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
+        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
+        cfg.MODEL.MASK_ON = True
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}
+
+        image_shape = (15, 15)
+        gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        gt_instance0 = Instances(image_shape)
+        gt_instance0.gt_boxes = Boxes(gt_boxes0)
+        gt_instance0.gt_classes = torch.tensor([2, 1])
+        gt_instance0.gt_masks = BitMasks(torch.rand((2,) + image_shape) > 0.5)
+        gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32)
+        gt_instance1 = Instances(image_shape)
+        gt_instance1.gt_boxes = Boxes(gt_boxes1)
+        gt_instance1.gt_classes = torch.tensor([1, 2])
+        gt_instance1.gt_masks = BitMasks(torch.rand((2,) + image_shape) > 0.5)
+        gt_instances = [gt_instance0, gt_instance1]
+
+        proposal_generator = build_proposal_generator(cfg, feature_shape)
+        roi_heads = StandardROIHeads(cfg, feature_shape)
+
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
+            _, detector_losses = roi_heads(images, features, proposals, gt_instances)
+
+        detector_losses.update(proposal_losses)
+        expected_losses = {
+            "loss_cls": 4.5253729820251465,
+            "loss_box_reg": 0.009785720147192478,
+            "loss_mask": 0.693184494972229,
+            "loss_rpn_cls": 0.08186662942171097,
+            "loss_rpn_loc": 0.1104838103055954,
+        }
+        succ = all(
+            torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0)))
+            for name in detector_losses.keys()
+        )
+        self.assertTrue(
+            succ,
+            "Losses has changed! New losses: {}".format(
+                {k: v.item() for k, v in detector_losses.items()}
+            ),
+        )
+
+    def test_rroi_heads(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
+        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
+        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
+        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
+        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
+        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
+        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
+        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
+        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}
+
+        image_shape = (15, 15)
+        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32)
+        gt_instance0 = Instances(image_shape)
+        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
+        gt_instance0.gt_classes = torch.tensor([2, 1])
+        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32)
+        gt_instance1 = Instances(image_shape)
+        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
+        gt_instance1.gt_classes = torch.tensor([1, 2])
+        gt_instances = [gt_instance0, gt_instance1]
+
+        proposal_generator = build_proposal_generator(cfg, feature_shape)
+        roi_heads = build_roi_heads(cfg, feature_shape)
+
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
+            _, detector_losses = roi_heads(images, features, proposals, gt_instances)
+
+        detector_losses.update(proposal_losses)
+        expected_losses = {
+            "loss_cls": 4.365657806396484,
+            "loss_box_reg": 0.0015851043863222003,
+            "loss_rpn_cls": 0.2427729219198227,
+            "loss_rpn_loc": 0.3646621108055115,
+        }
+        succ = all(
+            torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0)))
+            for name in detector_losses.keys()
+        )
+        self.assertTrue(
+            succ,
+            "Losses has changed! New losses: {}".format(
+                {k: v.item() for k, v in detector_losses.items()}
+            ),
+        )
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_box_head_scriptability(self):
+        input_shape = ShapeSpec(channels=1024, height=14, width=14)
+        box_features = torch.randn(4, 1024, 14, 14)
+
+        box_head = FastRCNNConvFCHead(
+            input_shape, conv_dims=[512, 512], fc_dims=[1024, 1024]
+        ).eval()
+        script_box_head = torch.jit.script(box_head)
+
+        origin_output = box_head(box_features)
+        script_output = script_box_head(box_features)
+        self.assertTrue(torch.equal(origin_output, script_output))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_mask_head_scriptability(self):
+        input_shape = ShapeSpec(channels=1024)
+        mask_features = torch.randn(4, 1024, 14, 14)
+
+        image_shapes = [(10, 10), (15, 15)]
+        pred_instance0 = Instances(image_shapes[0])
+        pred_classes0 = torch.tensor([1, 2, 3], dtype=torch.int64)
+        pred_instance0.pred_classes = pred_classes0
+        pred_instance1 = Instances(image_shapes[1])
+        pred_classes1 = torch.tensor([4], dtype=torch.int64)
+        pred_instance1.pred_classes = pred_classes1
+
+        mask_head = MaskRCNNConvUpsampleHead(
+            input_shape, num_classes=80, conv_dims=[256, 256]
+        ).eval()
+        # pred_instance will be in-place changed during the inference
+        # process of `MaskRCNNConvUpsampleHead`
+        origin_outputs = mask_head(mask_features, deepcopy([pred_instance0, pred_instance1]))
+
+        fields = {"pred_masks": torch.Tensor, "pred_classes": torch.Tensor}
+        with freeze_training_mode(mask_head), patch_instances(fields) as NewInstances:
+            sciript_mask_head = torch.jit.script(mask_head)
+            pred_instance0 = NewInstances.from_instances(pred_instance0)
+            pred_instance1 = NewInstances.from_instances(pred_instance1)
+            script_outputs = sciript_mask_head(mask_features, [pred_instance0, pred_instance1])
+
+        for origin_ins, script_ins in zip(origin_outputs, script_outputs):
+            assert_instances_allclose(origin_ins, script_ins, rtol=0)
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_keypoint_head_scriptability(self):
+        input_shape = ShapeSpec(channels=1024, height=14, width=14)
+        keypoint_features = torch.randn(4, 1024, 14, 14)
+
+        image_shapes = [(10, 10), (15, 15)]
+        pred_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6], [1, 5, 2, 8]], dtype=torch.float32)
+        pred_instance0 = Instances(image_shapes[0])
+        pred_instance0.pred_boxes = Boxes(pred_boxes0)
+        pred_boxes1 = torch.tensor([[7, 3, 10, 5]], dtype=torch.float32)
+        pred_instance1 = Instances(image_shapes[1])
+        pred_instance1.pred_boxes = Boxes(pred_boxes1)
+
+        keypoint_head = KRCNNConvDeconvUpsampleHead(
+            input_shape, num_keypoints=17, conv_dims=[512, 512]
+        ).eval()
+        origin_outputs = keypoint_head(
+            keypoint_features, deepcopy([pred_instance0, pred_instance1])
+        )
+
+        fields = {
+            "pred_boxes": Boxes,
+            "pred_keypoints": torch.Tensor,
+            "pred_keypoint_heatmaps": torch.Tensor,
+        }
+        with freeze_training_mode(keypoint_head), patch_instances(fields) as NewInstances:
+            sciript_keypoint_head = torch.jit.script(keypoint_head)
+            pred_instance0 = NewInstances.from_instances(pred_instance0)
+            pred_instance1 = NewInstances.from_instances(pred_instance1)
+            script_outputs = sciript_keypoint_head(
+                keypoint_features, [pred_instance0, pred_instance1]
+            )
+
+        for origin_ins, script_ins in zip(origin_outputs, script_outputs):
+            assert_instances_allclose(origin_ins, script_ins, rtol=0)
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_StandardROIHeads_scriptability(self):
+        cfg = get_cfg()
+        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
+        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
+        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
+        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
+        cfg.MODEL.MASK_ON = True
+        cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.01
+        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}
+
+        roi_heads = StandardROIHeads(cfg, feature_shape).eval()
+
+        proposal0 = Instances(image_sizes[0])
+        proposal_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        proposal0.proposal_boxes = Boxes(proposal_boxes0)
+        proposal0.objectness_logits = torch.tensor([0.5, 0.7], dtype=torch.float32)
+
+        proposal1 = Instances(image_sizes[1])
+        proposal_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32)
+        proposal1.proposal_boxes = Boxes(proposal_boxes1)
+        proposal1.objectness_logits = torch.tensor([0.1, 0.9], dtype=torch.float32)
+        proposals = [proposal0, proposal1]
+
+        pred_instances, _ = roi_heads(images, features, proposals)
+        fields = {
+            "objectness_logits": torch.Tensor,
+            "proposal_boxes": Boxes,
+            "pred_classes": torch.Tensor,
+            "scores": torch.Tensor,
+            "pred_masks": torch.Tensor,
+            "pred_boxes": Boxes,
+            "pred_keypoints": torch.Tensor,
+            "pred_keypoint_heatmaps": torch.Tensor,
+        }
+        with freeze_training_mode(roi_heads), patch_instances(fields) as new_instances:
+            proposal0 = new_instances.from_instances(proposal0)
+            proposal1 = new_instances.from_instances(proposal1)
+            proposals = [proposal0, proposal1]
+            scripted_rot_heads = torch.jit.script(roi_heads)
+            scripted_pred_instances, _ = scripted_rot_heads(images, features, proposals)
+
+        for instance, scripted_instance in zip(pred_instances, scripted_pred_instances):
+            assert_instances_allclose(instance, scripted_instance, rtol=0)
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_PointRend_mask_head_tracing(self):
+        cfg = model_zoo.get_config("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml")
+        point_rend.add_pointrend_config(cfg)
+        cfg.MODEL.ROI_HEADS.IN_FEATURES = ["p2", "p3"]
+        cfg.MODEL.ROI_MASK_HEAD.NAME = "PointRendMaskHead"
+        cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE = ""
+        cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON = True
+        chan = 256
+        head = point_rend.PointRendMaskHead(
+            cfg,
+            {
+                "p2": ShapeSpec(channels=chan, stride=4),
+                "p3": ShapeSpec(channels=chan, stride=8),
+            },
+        )
+
+        def gen_inputs(h, w, N):
+            p2 = torch.rand(1, chan, h, w)
+            p3 = torch.rand(1, chan, h // 2, w // 2)
+            boxes = random_boxes(N, max_coord=h)
+            return p2, p3, boxes
+
+        class Wrap(nn.ModuleDict):
+            def forward(self, p2, p3, boxes):
+                features = {
+                    "p2": p2,
+                    "p3": p3,
+                }
+                inst = Instances((p2.shape[2] * 4, p2.shape[3] * 4))
+                inst.pred_boxes = Boxes(boxes)
+                inst.pred_classes = torch.zeros(inst.__len__(), dtype=torch.long)
+                out = self.head(features, [inst])[0]
+                return out.pred_masks
+
+        model = Wrap({"head": head})
+        model.eval()
+        with torch.no_grad(), patch_builtin_len():
+            traced = torch.jit.trace(model, gen_inputs(302, 208, 20))
+            inputs = gen_inputs(100, 120, 30)
+            out_eager = model(*inputs)
+            out_trace = traced(*inputs)
+            self.assertTrue(torch.allclose(out_eager, out_trace))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/modeling/test_roi_pooler.py b/src/sts/tests/modeling/test_roi_pooler.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9511ff09c6d8438b12ae413906bb74fdb8feac0
--- /dev/null
+++ b/src/sts/tests/modeling/test_roi_pooler.py
@@ -0,0 +1,180 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import unittest
+import torch
+
+from detectron2.modeling.poolers import ROIPooler, _fmt_box_list
+from detectron2.structures import Boxes, RotatedBoxes
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.testing import random_boxes
+
+logger = logging.getLogger(__name__)
+
+
+class TestROIPooler(unittest.TestCase):
+    def _test_roialignv2_roialignrotated_match(self, device):
+        pooler_resolution = 14
+        canonical_level = 4
+        canonical_scale_factor = 2 ** canonical_level
+        pooler_scales = (1.0 / canonical_scale_factor,)
+        sampling_ratio = 0
+
+        N, C, H, W = 2, 4, 10, 8
+        N_rois = 10
+        std = 11
+        mean = 0
+        feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean
+
+        features = [feature.to(device)]
+
+        rois = []
+        rois_rotated = []
+        for _ in range(N):
+            boxes = random_boxes(N_rois, W * canonical_scale_factor)
+            rotated_boxes = torch.zeros(N_rois, 5)
+            rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+            rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+            rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+            rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+            rois.append(Boxes(boxes).to(device))
+            rois_rotated.append(RotatedBoxes(rotated_boxes).to(device))
+
+        roialignv2_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type="ROIAlignV2",
+        )
+
+        roialignv2_out = roialignv2_pooler(features, rois)
+
+        roialignrotated_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type="ROIAlignRotated",
+        )
+
+        roialignrotated_out = roialignrotated_pooler(features, rois_rotated)
+
+        self.assertTrue(torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4))
+
+    def test_roialignv2_roialignrotated_match_cpu(self):
+        self._test_roialignv2_roialignrotated_match(device="cpu")
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_roialignv2_roialignrotated_match_cuda(self):
+        self._test_roialignv2_roialignrotated_match(device="cuda")
+
+    def _test_scriptability(self, device):
+        pooler_resolution = 14
+        canonical_level = 4
+        canonical_scale_factor = 2 ** canonical_level
+        pooler_scales = (1.0 / canonical_scale_factor,)
+        sampling_ratio = 0
+
+        N, C, H, W = 2, 4, 10, 8
+        N_rois = 10
+        std = 11
+        mean = 0
+        feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean
+
+        features = [feature.to(device)]
+
+        rois = []
+        for _ in range(N):
+            boxes = random_boxes(N_rois, W * canonical_scale_factor)
+
+            rois.append(Boxes(boxes).to(device))
+
+        roialignv2_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type="ROIAlignV2",
+        )
+
+        roialignv2_out = roialignv2_pooler(features, rois)
+        scripted_roialignv2_out = torch.jit.script(roialignv2_pooler)(features, rois)
+        self.assertTrue(torch.equal(roialignv2_out, scripted_roialignv2_out))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_scriptability_cpu(self):
+        self._test_scriptability(device="cpu")
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_scriptability_gpu(self):
+        self._test_scriptability(device="cuda")
+
+    def test_no_images(self):
+        N, C, H, W = 0, 32, 32, 32
+        feature = torch.rand(N, C, H, W) - 0.5
+        features = [feature]
+        pooler = ROIPooler(
+            output_size=14, scales=(1.0,), sampling_ratio=0.0, pooler_type="ROIAlignV2"
+        )
+        output = pooler.forward(features, [])
+        self.assertEqual(output.shape, (0, C, 14, 14))
+
+    def test_fmt_box_list_tracing(self):
+        class Model(torch.nn.Module):
+            def forward(self, box_tensor):
+                return _fmt_box_list(box_tensor, 0)
+
+        with torch.no_grad():
+            func = torch.jit.trace(Model(), torch.ones(10, 4))
+
+            self.assertEqual(func(torch.ones(10, 4)).shape, (10, 5))
+            self.assertEqual(func(torch.ones(5, 4)).shape, (5, 5))
+            self.assertEqual(func(torch.ones(20, 4)).shape, (20, 5))
+
+    def test_roi_pooler_tracing(self):
+        class Model(torch.nn.Module):
+            def __init__(self, roi):
+                super(Model, self).__init__()
+                self.roi = roi
+
+            def forward(self, x, boxes):
+                return self.roi(x, [Boxes(boxes)])
+
+        pooler_resolution = 14
+        canonical_level = 4
+        canonical_scale_factor = 2 ** canonical_level
+        pooler_scales = (1.0 / canonical_scale_factor, 0.5 / canonical_scale_factor)
+        sampling_ratio = 0
+
+        N, C, H, W = 1, 4, 10, 8
+        N_rois = 10
+        std = 11
+        mean = 0
+        feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean
+        feature = [feature, feature]
+
+        rois = random_boxes(N_rois, W * canonical_scale_factor)
+        # Add one larger box so that this level has only one box.
+        # This may trigger the bug https://github.com/pytorch/pytorch/issues/49852
+        # that we shall workaround.
+        rois = torch.cat([rois, torch.tensor([[0, 0, 448, 448]])])
+
+        model = Model(
+            ROIPooler(
+                output_size=pooler_resolution,
+                scales=pooler_scales,
+                sampling_ratio=sampling_ratio,
+                pooler_type="ROIAlign",
+            )
+        )
+
+        with torch.no_grad():
+            func = torch.jit.trace(model, (feature, rois))
+            o = func(feature, rois)
+            self.assertEqual(o.shape, (11, 4, 14, 14))
+            o = func(feature, rois[:5])
+            self.assertEqual(o.shape, (5, 4, 14, 14))
+            o = func(feature, random_boxes(20, W * canonical_scale_factor))
+            self.assertEqual(o.shape, (20, 4, 14, 14))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/modeling/test_rpn.py b/src/sts/tests/modeling/test_rpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..d65cda5c452d76b6f0f59279dab58fcf8a404a11
--- /dev/null
+++ b/src/sts/tests/modeling/test_rpn.py
@@ -0,0 +1,238 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import sys
+import unittest
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.export import scripting_with_instances
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import build_backbone
+from detectron2.modeling.proposal_generator import RPN, build_proposal_generator
+from detectron2.modeling.proposal_generator.proposal_utils import find_top_rpn_proposals
+from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.events import EventStorage
+
+logger = logging.getLogger(__name__)
+
+
+class RPNTest(unittest.TestCase):
+    def get_gt_and_features(self):
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        image_shape = (15, 15)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        gt_instances = Instances(image_shape)
+        gt_instances.gt_boxes = Boxes(gt_boxes)
+        return (gt_instances, features, images, image_sizes)
+
+    def test_rpn(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        backbone = build_backbone(cfg)
+        proposal_generator = RPN(cfg, backbone.output_shape())
+        (gt_instances, features, images, image_sizes) = self.get_gt_and_features()
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(
+                images, features, [gt_instances[0], gt_instances[1]]
+            )
+
+        expected_losses = {
+            "loss_rpn_cls": torch.tensor(0.08011703193),
+            "loss_rpn_loc": torch.tensor(0.101470276),
+        }
+        for name in expected_losses.keys():
+            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
+                name, proposal_losses[name], expected_losses[name]
+            )
+            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)
+
+        self.assertEqual(len(proposals), len(image_sizes))
+        for proposal, im_size in zip(proposals, image_sizes):
+            self.assertEqual(proposal.image_size, im_size)
+
+        expected_proposal_box = torch.tensor([[0, 0, 10, 10], [7.2702, 0, 10, 10]])
+        expected_objectness_logit = torch.tensor([0.1596, -0.0007])
+        self.assertTrue(
+            torch.allclose(proposals[0].proposal_boxes.tensor, expected_proposal_box, atol=1e-4)
+        )
+        self.assertTrue(
+            torch.allclose(proposals[0].objectness_logits, expected_objectness_logit, atol=1e-4)
+        )
+
+    def verify_rpn(self, conv_dims, expected_conv_dims):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.RPN.CONV_DIMS = conv_dims
+        backbone = build_backbone(cfg)
+        proposal_generator = RPN(cfg, backbone.output_shape())
+        for k, conv in enumerate(proposal_generator.rpn_head.conv):
+            self.assertEqual(expected_conv_dims[k], conv.out_channels)
+        return proposal_generator
+
+    def test_rpn_larger_num_convs(self):
+        conv_dims = [64, 64, 64, 64, 64]
+        proposal_generator = self.verify_rpn(conv_dims, conv_dims)
+        (gt_instances, features, images, image_sizes) = self.get_gt_and_features()
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(
+                images, features, [gt_instances[0], gt_instances[1]]
+            )
+        expected_losses = {
+            "loss_rpn_cls": torch.tensor(0.08122821152),
+            "loss_rpn_loc": torch.tensor(0.10064548254),
+        }
+        for name in expected_losses.keys():
+            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
+                name, proposal_losses[name], expected_losses[name]
+            )
+            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)
+
+    def test_rpn_conv_dims_not_set(self):
+        conv_dims = [-1, -1, -1]
+        expected_conv_dims = [1024, 1024, 1024]
+        self.verify_rpn(conv_dims, expected_conv_dims)
+
+    # https://github.com/pytorch/pytorch/issues/46964
+    @unittest.skipIf(
+        TORCH_VERSION < (1, 7) or sys.version_info.minor <= 6, "Insufficient pytorch version"
+    )
+    def test_rpn_scriptability(self):
+        cfg = get_cfg()
+        proposal_generator = RPN(cfg, {"res4": ShapeSpec(channels=1024, stride=16)}).eval()
+        num_images = 2
+        images_tensor = torch.rand(num_images, 30, 40)
+        image_sizes = [(32, 32), (30, 40)]
+        images = ImageList(images_tensor, image_sizes)
+        features = {"res4": torch.rand(num_images, 1024, 1, 2)}
+
+        fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor}
+        proposal_generator_ts = scripting_with_instances(proposal_generator, fields)
+
+        proposals, _ = proposal_generator(images, features)
+        proposals_ts, _ = proposal_generator_ts(images, features)
+
+        for proposal, proposal_ts in zip(proposals, proposals_ts):
+            self.assertEqual(proposal.image_size, proposal_ts.image_size)
+            self.assertTrue(
+                torch.equal(proposal.proposal_boxes.tensor, proposal_ts.proposal_boxes.tensor)
+            )
+            self.assertTrue(torch.equal(proposal.objectness_logits, proposal_ts.objectness_logits))
+
+    def test_rrpn(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
+        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
+        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
+        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]]
+        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]]
+        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
+        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
+        backbone = build_backbone(cfg)
+        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        image_shape = (15, 15)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
+        gt_instances = Instances(image_shape)
+        gt_instances.gt_boxes = RotatedBoxes(gt_boxes)
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(
+                images, features, [gt_instances[0], gt_instances[1]]
+            )
+
+        expected_losses = {
+            "loss_rpn_cls": torch.tensor(0.04291602224),
+            "loss_rpn_loc": torch.tensor(0.145077362),
+        }
+        for name in expected_losses.keys():
+            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
+                name, proposal_losses[name], expected_losses[name]
+            )
+            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)
+
+        expected_proposal_box = torch.tensor(
+            [
+                [-1.77999556, 0.78155339, 68.04367828, 14.78156471, 60.59333801],
+                [13.82740974, -1.50282836, 34.67269897, 29.19676590, -3.81942749],
+                [8.10392570, -0.99071521, 145.39100647, 32.13126373, 3.67242432],
+                [5.00000000, 4.57370186, 10.00000000, 9.14740372, 0.89196777],
+            ]
+        )
+
+        expected_objectness_logit = torch.tensor([0.10924313, 0.09881870, 0.07649877, 0.05858029])
+
+        torch.set_printoptions(precision=8, sci_mode=False)
+
+        self.assertEqual(len(proposals), len(image_sizes))
+
+        proposal = proposals[0]
+        # It seems that there's some randomness in the result across different machines:
+        # This test can be run on a local machine for 100 times with exactly the same result,
+        # However, a different machine might produce slightly different results,
+        # thus the atol here.
+        err_msg = "computed proposal boxes = {}, expected {}".format(
+            proposal.proposal_boxes.tensor, expected_proposal_box
+        )
+        self.assertTrue(
+            torch.allclose(proposal.proposal_boxes.tensor[:4], expected_proposal_box, atol=1e-5),
+            err_msg,
+        )
+
+        err_msg = "computed objectness logits = {}, expected {}".format(
+            proposal.objectness_logits, expected_objectness_logit
+        )
+        self.assertTrue(
+            torch.allclose(proposal.objectness_logits[:4], expected_objectness_logit, atol=1e-5),
+            err_msg,
+        )
+
+    def test_find_rpn_proposals_inf(self):
+        N, Hi, Wi, A = 3, 3, 3, 3
+        proposals = [torch.rand(N, Hi * Wi * A, 4)]
+        pred_logits = [torch.rand(N, Hi * Wi * A)]
+        pred_logits[0][1][3:5].fill_(float("inf"))
+        find_top_rpn_proposals(proposals, pred_logits, [(10, 10)], 0.5, 1000, 1000, 0, False)
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_find_rpn_proposals_tracing(self):
+        N, Hi, Wi, A = 3, 50, 50, 9
+        proposal = torch.rand(N, Hi * Wi * A, 4)
+        pred_logit = torch.rand(N, Hi * Wi * A)
+
+        def func(proposal, logit, image_size):
+            r = find_top_rpn_proposals(
+                [proposal], [logit], [image_size], 0.7, 1000, 1000, 0, False
+            )[0]
+            size = r.image_size
+            if not isinstance(size, torch.Tensor):
+                size = torch.tensor(size)
+            return (size, r.proposal_boxes.tensor, r.objectness_logits)
+
+        other_inputs = []
+        # test that it generalizes to other shapes
+        for Hi, Wi, shp in [(30, 30, 60), (10, 10, 800)]:
+            other_inputs.append(
+                (
+                    torch.rand(N, Hi * Wi * A, 4),
+                    torch.rand(N, Hi * Wi * A),
+                    torch.tensor([shp, shp]),
+                )
+            )
+        torch.jit.trace(
+            func, (proposal, pred_logit, torch.tensor([100, 100])), check_inputs=other_inputs
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/structures/__init__.py b/src/sts/tests/structures/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/sts/tests/structures/test_boxes.py b/src/sts/tests/structures/test_boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb053479460bc18a754f839acc07dd120dd28553
--- /dev/null
+++ b/src/sts/tests/structures/test_boxes.py
@@ -0,0 +1,225 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import json
+import math
+import numpy as np
+import unittest
+import torch
+
+from detectron2.structures import Boxes, BoxMode, pairwise_ioa, pairwise_iou
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.testing import reload_script_model
+
+
+class TestBoxMode(unittest.TestCase):
+    def _convert_xy_to_wh(self, x):
+        return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+
+    def _convert_xywha_to_xyxy(self, x):
+        return BoxMode.convert(x, BoxMode.XYWHA_ABS, BoxMode.XYXY_ABS)
+
+    def _convert_xywh_to_xywha(self, x):
+        return BoxMode.convert(x, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
+
+    def test_convert_int_mode(self):
+        BoxMode.convert([1, 2, 3, 4], 0, 1)
+
+    def test_box_convert_list(self):
+        for tp in [list, tuple]:
+            box = tp([5.0, 5.0, 10.0, 10.0])
+            output = self._convert_xy_to_wh(box)
+            self.assertIsInstance(output, tp)
+            self.assertIsInstance(output[0], float)
+            self.assertEqual(output, tp([5.0, 5.0, 5.0, 5.0]))
+
+            with self.assertRaises(Exception):
+                self._convert_xy_to_wh([box])
+
+    def test_box_convert_array(self):
+        box = np.asarray([[5, 5, 10, 10], [1, 1, 2, 3]])
+        output = self._convert_xy_to_wh(box)
+        self.assertEqual(output.dtype, box.dtype)
+        self.assertEqual(output.shape, box.shape)
+        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
+        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
+
+    def test_box_convert_cpu_tensor(self):
+        box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
+        output = self._convert_xy_to_wh(box)
+        self.assertEqual(output.dtype, box.dtype)
+        self.assertEqual(output.shape, box.shape)
+        output = output.numpy()
+        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
+        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_box_convert_cuda_tensor(self):
+        box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]).cuda()
+        output = self._convert_xy_to_wh(box)
+        self.assertEqual(output.dtype, box.dtype)
+        self.assertEqual(output.shape, box.shape)
+        self.assertEqual(output.device, box.device)
+        output = output.cpu().numpy()
+        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
+        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
+
+    def test_box_convert_xywha_to_xyxy_list(self):
+        for tp in [list, tuple]:
+            box = tp([50, 50, 30, 20, 0])
+            output = self._convert_xywha_to_xyxy(box)
+            self.assertIsInstance(output, tp)
+            self.assertEqual(output, tp([35, 40, 65, 60]))
+
+            with self.assertRaises(Exception):
+                self._convert_xywha_to_xyxy([box])
+
+    def test_box_convert_xywha_to_xyxy_array(self):
+        for dtype in [np.float64, np.float32]:
+            box = np.asarray(
+                [
+                    [50, 50, 30, 20, 0],
+                    [50, 50, 30, 20, 90],
+                    [1, 1, math.sqrt(2), math.sqrt(2), -45],
+                ],
+                dtype=dtype,
+            )
+            output = self._convert_xywha_to_xyxy(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = np.asarray([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype)
+            self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_box_convert_xywha_to_xyxy_tensor(self):
+        for dtype in [torch.float32, torch.float64]:
+            box = torch.tensor(
+                [
+                    [50, 50, 30, 20, 0],
+                    [50, 50, 30, 20, 90],
+                    [1, 1, math.sqrt(2), math.sqrt(2), -45],
+                ],
+                dtype=dtype,
+            )
+            output = self._convert_xywha_to_xyxy(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = torch.tensor([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype)
+
+            self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_box_convert_xywh_to_xywha_list(self):
+        for tp in [list, tuple]:
+            box = tp([50, 50, 30, 20])
+            output = self._convert_xywh_to_xywha(box)
+            self.assertIsInstance(output, tp)
+            self.assertEqual(output, tp([65, 60, 30, 20, 0]))
+
+            with self.assertRaises(Exception):
+                self._convert_xywh_to_xywha([box])
+
+    def test_box_convert_xywh_to_xywha_array(self):
+        for dtype in [np.float64, np.float32]:
+            box = np.asarray([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype)
+            output = self._convert_xywh_to_xywha(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = np.asarray(
+                [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype
+            )
+            self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_box_convert_xywh_to_xywha_tensor(self):
+        for dtype in [torch.float32, torch.float64]:
+            box = torch.tensor([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype)
+            output = self._convert_xywh_to_xywha(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = torch.tensor(
+                [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype
+            )
+
+            self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_json_serializable(self):
+        payload = {"box_mode": BoxMode.XYWH_REL}
+        try:
+            json.dumps(payload)
+        except Exception:
+            self.fail("JSON serialization failed")
+
+    def test_json_deserializable(self):
+        payload = '{"box_mode": 2}'
+        obj = json.loads(payload)
+        try:
+            obj["box_mode"] = BoxMode(obj["box_mode"])
+        except Exception:
+            self.fail("JSON deserialization failed")
+
+
+class TestBoxIOU(unittest.TestCase):
+    def create_boxes(self):
+        boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])
+
+        boxes2 = torch.tensor(
+            [
+                [0.0, 0.0, 1.0, 1.0],
+                [0.0, 0.0, 0.5, 1.0],
+                [0.0, 0.0, 1.0, 0.5],
+                [0.0, 0.0, 0.5, 0.5],
+                [0.5, 0.5, 1.0, 1.0],
+                [0.5, 0.5, 1.5, 1.5],
+            ]
+        )
+        return boxes1, boxes2
+
+    def test_pairwise_iou(self):
+        boxes1, boxes2 = self.create_boxes()
+        expected_ious = torch.tensor(
+            [
+                [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+                [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+            ]
+        )
+
+        ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2))
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_ioa(self):
+        boxes1, boxes2 = self.create_boxes()
+        expected_ioas = torch.tensor(
+            [[1.0, 1.0, 1.0, 1.0, 1.0, 0.25], [1.0, 1.0, 1.0, 1.0, 1.0, 0.25]]
+        )
+        ioas = pairwise_ioa(Boxes(boxes1), Boxes(boxes2))
+        self.assertTrue(torch.allclose(ioas, expected_ioas))
+
+
+class TestBoxes(unittest.TestCase):
+    def test_empty_cat(self):
+        x = Boxes.cat([])
+        self.assertTrue(x.tensor.shape, (0, 4))
+
+    def test_to(self):
+        x = Boxes(torch.rand(3, 4))
+        self.assertEqual(x.to(device="cpu").tensor.device.type, "cpu")
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_scriptability(self):
+        def func(x):
+            boxes = Boxes(x)
+            test = boxes.to(torch.device("cpu")).tensor
+            return boxes.area(), test
+
+        f = torch.jit.script(func)
+        f = reload_script_model(f)
+        f(torch.rand((3, 4)))
+
+        data = torch.rand((3, 4))
+
+        def func_cat(x: torch.Tensor):
+            boxes1 = Boxes(x)
+            boxes2 = Boxes(x)
+            # boxes3 = Boxes.cat([boxes1, boxes2])  # this is not supported by torchsript for now.
+            boxes3 = boxes1.cat([boxes1, boxes2])
+            return boxes3
+
+        f = torch.jit.script(func_cat)
+        script_box = f(data)
+        self.assertTrue(torch.equal(torch.cat([data, data]), script_box.tensor))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/structures/test_imagelist.py b/src/sts/tests/structures/test_imagelist.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8616ee4d18c3682ab350c7df37195d91f6ff563
--- /dev/null
+++ b/src/sts/tests/structures/test_imagelist.py
@@ -0,0 +1,79 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import unittest
+from typing import List, Sequence, Tuple
+import torch
+
+from detectron2.structures import ImageList
+from detectron2.utils.env import TORCH_VERSION
+
+
+class TestImageList(unittest.TestCase):
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_imagelist_padding_tracing(self):
+        # test that the trace does not contain hard-coded constant sizes
+        def to_imagelist(tensors: Sequence[torch.Tensor]):
+            image_list = ImageList.from_tensors(tensors, 4)
+            return image_list.tensor, image_list.image_sizes
+
+        def _tensor(*shape):
+            return torch.ones(shape, dtype=torch.float32)
+
+        # test CHW (inputs needs padding vs. no padding)
+        for shape in [(3, 10, 10), (3, 12, 12)]:
+            func = torch.jit.trace(to_imagelist, ([_tensor(*shape)],))
+            tensor, image_sizes = func([_tensor(3, 15, 20)])
+            self.assertEqual(tensor.shape, (1, 3, 16, 20), tensor.shape)
+            self.assertEqual(image_sizes[0].tolist(), [15, 20], image_sizes[0])
+
+        # test HW
+        func = torch.jit.trace(to_imagelist, ([_tensor(10, 10)],))
+        tensor, image_sizes = func([_tensor(15, 20)])
+        self.assertEqual(tensor.shape, (1, 16, 20), tensor.shape)
+        self.assertEqual(image_sizes[0].tolist(), [15, 20], image_sizes[0])
+
+        # test 2x CHW
+        func = torch.jit.trace(
+            to_imagelist,
+            ([_tensor(3, 16, 10), _tensor(3, 13, 11)],),
+        )
+        tensor, image_sizes = func([_tensor(3, 25, 20), _tensor(3, 10, 10)])
+        self.assertEqual(tensor.shape, (2, 3, 28, 20), tensor.shape)
+        self.assertEqual(image_sizes[0].tolist(), [25, 20], image_sizes[0])
+        self.assertEqual(image_sizes[1].tolist(), [10, 10], image_sizes[1])
+        # support calling with different spatial sizes, but not with different #images
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_imagelist_scriptability(self):
+        image_nums = 2
+        image_tensor = torch.randn((image_nums, 10, 20), dtype=torch.float32)
+        image_shape = [(10, 20)] * image_nums
+
+        def f(image_tensor, image_shape: List[Tuple[int, int]]):
+            return ImageList(image_tensor, image_shape)
+
+        ret = f(image_tensor, image_shape)
+        ret_script = torch.jit.script(f)(image_tensor, image_shape)
+
+        self.assertEqual(len(ret), len(ret_script))
+        for i in range(image_nums):
+            self.assertTrue(torch.equal(ret[i], ret_script[i]))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_imagelist_from_tensors_scriptability(self):
+        image_tensor_0 = torch.randn(10, 20, dtype=torch.float32)
+        image_tensor_1 = torch.randn(12, 22, dtype=torch.float32)
+        inputs = [image_tensor_0, image_tensor_1]
+
+        def f(image_tensor: List[torch.Tensor]):
+            return ImageList.from_tensors(image_tensor, 10)
+
+        ret = f(inputs)
+        ret_script = torch.jit.script(f)(inputs)
+
+        self.assertEqual(len(ret), len(ret_script))
+        self.assertTrue(torch.equal(ret.tensor, ret_script.tensor))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/structures/test_instances.py b/src/sts/tests/structures/test_instances.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f66bf6565c7138e8e2ba465713ccd2cab2bcaae
--- /dev/null
+++ b/src/sts/tests/structures/test_instances.py
@@ -0,0 +1,192 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import unittest
+import torch
+from torch import Tensor
+
+from detectron2.export.torchscript import patch_instances
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.testing import convert_scripted_instances
+
+
+class TestInstances(unittest.TestCase):
+    def test_int_indexing(self):
+        attr1 = torch.tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 0.5], [0.0, 0.0, 1.0], [0.0, 0.5, 0.5]])
+        attr2 = torch.tensor([0.1, 0.2, 0.3, 0.4])
+        instances = Instances((100, 100))
+        instances.attr1 = attr1
+        instances.attr2 = attr2
+        for i in range(-len(instances), len(instances)):
+            inst = instances[i]
+            self.assertEqual((inst.attr1 == attr1[i]).all(), True)
+            self.assertEqual((inst.attr2 == attr2[i]).all(), True)
+
+        self.assertRaises(IndexError, lambda: instances[len(instances)])
+        self.assertRaises(IndexError, lambda: instances[-len(instances) - 1])
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_script_new_fields(self):
+        def get_mask(x: Instances) -> torch.Tensor:
+            return x.mask
+
+        class f(torch.nn.Module):
+            def forward(self, x: Instances):
+                proposal_boxes = x.proposal_boxes  # noqa F841
+                objectness_logits = x.objectness_logits  # noqa F841
+                return x
+
+        class g(torch.nn.Module):
+            def forward(self, x: Instances):
+                return get_mask(x)
+
+        class g2(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.g = g()
+
+            def forward(self, x: Instances):
+                proposal_boxes = x.proposal_boxes  # noqa F841
+                return x, self.g(x)
+
+        fields = {"proposal_boxes": Boxes, "objectness_logits": Tensor}
+        with patch_instances(fields):
+            torch.jit.script(f())
+
+        # can't script anymore after exiting the context
+        with self.assertRaises(Exception):
+            # will create a ConcreteType for g
+            torch.jit.script(g2())
+
+        new_fields = {"mask": Tensor}
+        with patch_instances(new_fields):
+            # will compile g with a different Instances; this should pass
+            torch.jit.script(g())
+            with self.assertRaises(Exception):
+                torch.jit.script(g2())
+
+        new_fields = {"mask": Tensor, "proposal_boxes": Boxes}
+        with patch_instances(new_fields) as NewInstances:
+            # get_mask will be compiled with a different Instances; this should pass
+            scripted_g2 = torch.jit.script(g2())
+            x = NewInstances((3, 4))
+            x.mask = torch.rand(3)
+            x.proposal_boxes = Boxes(torch.rand(3, 4))
+            scripted_g2(x)  # it should accept the new Instances object and run successfully
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_script_access_fields(self):
+        class f(torch.nn.Module):
+            def forward(self, x: Instances):
+                proposal_boxes = x.proposal_boxes
+                objectness_logits = x.objectness_logits
+                return proposal_boxes.tensor + objectness_logits
+
+        fields = {"proposal_boxes": Boxes, "objectness_logits": Tensor}
+        with patch_instances(fields):
+            torch.jit.script(f())
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_script_len(self):
+        class f(torch.nn.Module):
+            def forward(self, x: Instances):
+                return len(x)
+
+        class g(torch.nn.Module):
+            def forward(self, x: Instances):
+                return len(x)
+
+        image_shape = (15, 15)
+
+        fields = {"proposal_boxes": Boxes}
+        with patch_instances(fields) as new_instance:
+            script_module = torch.jit.script(f())
+            x = new_instance(image_shape)
+            with self.assertRaises(Exception):
+                script_module(x)
+            box_tensors = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
+            x.proposal_boxes = Boxes(box_tensors)
+            length = script_module(x)
+            self.assertEqual(length, 2)
+
+        fields = {"objectness_logits": Tensor}
+        with patch_instances(fields) as new_instance:
+            script_module = torch.jit.script(g())
+            x = new_instance(image_shape)
+            objectness_logits = torch.tensor([1.0]).reshape(1, 1)
+            x.objectness_logits = objectness_logits
+            length = script_module(x)
+            self.assertEqual(length, 1)
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_script_has(self):
+        class f(torch.nn.Module):
+            def forward(self, x: Instances):
+                return x.has("proposal_boxes")
+
+        image_shape = (15, 15)
+        fields = {"proposal_boxes": Boxes}
+        with patch_instances(fields) as new_instance:
+            script_module = torch.jit.script(f())
+            x = new_instance(image_shape)
+            self.assertFalse(script_module(x))
+
+            box_tensors = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
+            x.proposal_boxes = Boxes(box_tensors)
+            self.assertTrue(script_module(x))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_script_to(self):
+        class f(torch.nn.Module):
+            def forward(self, x: Instances):
+                return x.to(torch.device("cpu"))
+
+        image_shape = (15, 15)
+        fields = {"proposal_boxes": Boxes, "a": Tensor}
+        with patch_instances(fields) as new_instance:
+            script_module = torch.jit.script(f())
+            x = new_instance(image_shape)
+            script_module(x)
+
+            box_tensors = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
+            x.proposal_boxes = Boxes(box_tensors)
+            x.a = box_tensors
+            script_module(x)
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_script_getitem(self):
+        class f(torch.nn.Module):
+            def forward(self, x: Instances, idx):
+                return x[idx]
+
+        image_shape = (15, 15)
+        fields = {"proposal_boxes": Boxes, "a": Tensor}
+        inst = Instances(image_shape)
+        inst.proposal_boxes = Boxes(torch.rand(4, 4))
+        inst.a = torch.rand(4, 10)
+        idx = torch.tensor([True, False, True, False])
+        with patch_instances(fields) as new_instance:
+            script_module = torch.jit.script(f())
+
+            out = f()(inst, idx)
+            out_scripted = script_module(new_instance.from_instances(inst), idx)
+            self.assertTrue(
+                torch.equal(out.proposal_boxes.tensor, out_scripted.proposal_boxes.tensor)
+            )
+            self.assertTrue(torch.equal(out.a, out_scripted.a))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_from_to_instances(self):
+        orig = Instances((30, 30))
+        orig.proposal_boxes = Boxes(torch.rand(3, 4))
+
+        fields = {"proposal_boxes": Boxes, "a": Tensor}
+        with patch_instances(fields) as NewInstances:
+            # convert to NewInstances and back
+            new1 = NewInstances.from_instances(orig)
+            new2 = convert_scripted_instances(new1)
+        self.assertTrue(torch.equal(orig.proposal_boxes.tensor, new1.proposal_boxes.tensor))
+        self.assertTrue(torch.equal(orig.proposal_boxes.tensor, new2.proposal_boxes.tensor))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/structures/test_masks.py b/src/sts/tests/structures/test_masks.py
new file mode 100644
index 0000000000000000000000000000000000000000..819aaf575361b714a8d7fb177051c5b4f7ea64f2
--- /dev/null
+++ b/src/sts/tests/structures/test_masks.py
@@ -0,0 +1,43 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import unittest
+import torch
+
+from detectron2.structures.masks import BitMasks, PolygonMasks, polygons_to_bitmask
+
+
+class TestBitMask(unittest.TestCase):
+    def test_get_bounding_box(self):
+        masks = torch.tensor(
+            [
+                [
+                    [False, False, False, True],
+                    [False, False, True, True],
+                    [False, True, True, False],
+                    [False, True, True, False],
+                ],
+                [
+                    [False, False, False, False],
+                    [False, False, True, False],
+                    [False, True, True, False],
+                    [False, True, True, False],
+                ],
+                torch.zeros(4, 4),
+            ]
+        )
+        bitmask = BitMasks(masks)
+        box_true = torch.tensor([[1, 0, 4, 4], [1, 1, 3, 4], [0, 0, 0, 0]], dtype=torch.float32)
+        box = bitmask.get_bounding_boxes()
+        self.assertTrue(torch.all(box.tensor == box_true).item())
+
+        for box in box_true:
+            poly = box[[0, 1, 2, 1, 2, 3, 0, 3]].numpy()
+            mask = polygons_to_bitmask([poly], 4, 4)
+            reconstruct_box = BitMasks(mask[None, :, :]).get_bounding_boxes()[0].tensor
+            self.assertTrue(torch.all(box == reconstruct_box).item())
+
+            reconstruct_box = PolygonMasks([[poly]]).get_bounding_boxes()[0].tensor
+            self.assertTrue(torch.all(box == reconstruct_box).item())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/structures/test_rotated_boxes.py b/src/sts/tests/structures/test_rotated_boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cb32e7c01593d0378cefd6ae28e6144b33af059
--- /dev/null
+++ b/src/sts/tests/structures/test_rotated_boxes.py
@@ -0,0 +1,439 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+from __future__ import absolute_import, division, print_function, unicode_literals
+import logging
+import math
+import random
+import unittest
+import torch
+from fvcore.common.benchmark import benchmark
+
+from detectron2.layers.rotated_boxes import pairwise_iou_rotated
+from detectron2.structures.boxes import Boxes
+from detectron2.structures.rotated_boxes import RotatedBoxes, pairwise_iou
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.testing import reload_script_model
+
+logger = logging.getLogger(__name__)
+
+
+class TestRotatedBoxesLayer(unittest.TestCase):
+    def test_iou_0_dim_cpu(self):
+        boxes1 = torch.rand(0, 5, dtype=torch.float32)
+        boxes2 = torch.rand(10, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(0, 10, dtype=torch.float32)
+        ious = pairwise_iou_rotated(boxes1, boxes2)
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+        boxes1 = torch.rand(10, 5, dtype=torch.float32)
+        boxes2 = torch.rand(0, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(10, 0, dtype=torch.float32)
+        ious = pairwise_iou_rotated(boxes1, boxes2)
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_iou_0_dim_cuda(self):
+        boxes1 = torch.rand(0, 5, dtype=torch.float32)
+        boxes2 = torch.rand(10, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(0, 10, dtype=torch.float32)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
+
+        boxes1 = torch.rand(10, 5, dtype=torch.float32)
+        boxes2 = torch.rand(0, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(10, 0, dtype=torch.float32)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
+
+    def test_iou_half_overlap_cpu(self):
+        boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32)
+        boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32)
+        expected_ious = torch.tensor([[0.5]], dtype=torch.float32)
+        ious = pairwise_iou_rotated(boxes1, boxes2)
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_iou_half_overlap_cuda(self):
+        boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32)
+        boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32)
+        expected_ious = torch.tensor([[0.5]], dtype=torch.float32)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
+
+    def test_iou_precision(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            boxes1 = torch.tensor([[565, 565, 10, 10.0, 0]], dtype=torch.float32, device=device)
+            boxes2 = torch.tensor([[565, 565, 10, 8.3, 0]], dtype=torch.float32, device=device)
+            iou = 8.3 / 10.0
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32)
+            ious = pairwise_iou_rotated(boxes1, boxes2)
+            self.assertTrue(torch.allclose(ious.cpu(), expected_ious))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_iou_too_many_boxes_cuda(self):
+        s1, s2 = 5, 1289035
+        boxes1 = torch.zeros(s1, 5)
+        boxes2 = torch.zeros(s2, 5)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTupleEqual(tuple(ious_cuda.shape), (s1, s2))
+
+    def test_iou_extreme(self):
+        # Cause floating point issues in cuda kernels (#1266)
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device)
+            boxes2 = torch.tensor(
+                [
+                    [
+                        -1.117407639806935e17,
+                        1.3858420478349148e18,
+                        1000.0000610351562,
+                        1000.0000610351562,
+                        1612.0,
+                    ]
+                ],
+                device=device,
+            )
+            ious = pairwise_iou_rotated(boxes1, boxes2)
+            self.assertTrue(ious.min() >= 0, ious)
+
+    def test_iou_issue_2154(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            boxes1 = torch.tensor(
+                [
+                    [
+                        296.6620178222656,
+                        458.73883056640625,
+                        23.515729904174805,
+                        47.677001953125,
+                        0.08795166015625,
+                    ]
+                ],
+                device=device,
+            )
+            boxes2 = torch.tensor(
+                [[296.66201, 458.73882000000003, 23.51573, 47.67702, 0.087951]],
+                device=device,
+            )
+            ious = pairwise_iou_rotated(boxes1, boxes2)
+            expected_ious = torch.tensor([[1.0]], dtype=torch.float32)
+            self.assertTrue(torch.allclose(ious.cpu(), expected_ious))
+
+    def test_iou_issue_2167(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            boxes1 = torch.tensor(
+                [
+                    [
+                        2563.74462890625000000000,
+                        1436.79016113281250000000,
+                        2174.70336914062500000000,
+                        214.09500122070312500000,
+                        115.11834716796875000000,
+                    ]
+                ],
+                device=device,
+            )
+            boxes2 = torch.tensor(
+                [
+                    [
+                        2563.74462890625000000000,
+                        1436.79028320312500000000,
+                        2174.70288085937500000000,
+                        214.09495544433593750000,
+                        115.11835479736328125000,
+                    ]
+                ],
+                device=device,
+            )
+            ious = pairwise_iou_rotated(boxes1, boxes2)
+            expected_ious = torch.tensor([[1.0]], dtype=torch.float32)
+            self.assertTrue(torch.allclose(ious.cpu(), expected_ious))
+
+
+class TestRotatedBoxesStructure(unittest.TestCase):
+    def test_clip_area_0_degree(self):
+        for _ in range(50):
+            num_boxes = 100
+            boxes_5d = torch.zeros(num_boxes, 5)
+            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            # Convert from (x_ctr, y_ctr, w, h, 0) to  (x1, y1, x2, y2)
+            boxes_4d = torch.zeros(num_boxes, 4)
+            boxes_4d[:, 0] = boxes_5d[:, 0] - boxes_5d[:, 2] / 2.0
+            boxes_4d[:, 1] = boxes_5d[:, 1] - boxes_5d[:, 3] / 2.0
+            boxes_4d[:, 2] = boxes_5d[:, 0] + boxes_5d[:, 2] / 2.0
+            boxes_4d[:, 3] = boxes_5d[:, 1] + boxes_5d[:, 3] / 2.0
+
+            image_size = (500, 600)
+            test_boxes_4d = Boxes(boxes_4d)
+            test_boxes_5d = RotatedBoxes(boxes_5d)
+            # Before clip
+            areas_4d = test_boxes_4d.area()
+            areas_5d = test_boxes_5d.area()
+            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
+            # After clip
+            test_boxes_4d.clip(image_size)
+            test_boxes_5d.clip(image_size)
+            areas_4d = test_boxes_4d.area()
+            areas_5d = test_boxes_5d.area()
+            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
+
+    def test_clip_area_arbitrary_angle(self):
+        num_boxes = 100
+        boxes_5d = torch.zeros(num_boxes, 5)
+        boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+        boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+        boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+        boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+        boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
+        clip_angle_threshold = random.uniform(0, 180)
+
+        image_size = (500, 600)
+        test_boxes_5d = RotatedBoxes(boxes_5d)
+        # Before clip
+        areas_before = test_boxes_5d.area()
+        # After clip
+        test_boxes_5d.clip(image_size, clip_angle_threshold)
+        areas_diff = test_boxes_5d.area() - areas_before
+
+        # the areas should only decrease after clipping
+        self.assertTrue(torch.all(areas_diff <= 0))
+        # whenever the box is clipped (thus the area shrinks),
+        # the angle for the box must be within the clip_angle_threshold
+        # Note that the clip function will normalize the angle range
+        # to be within (-180, 180]
+        self.assertTrue(
+            torch.all(torch.abs(boxes_5d[:, 4][torch.where(areas_diff < 0)]) < clip_angle_threshold)
+        )
+
+    def test_normalize_angles(self):
+        # torch.manual_seed(0)
+        for _ in range(50):
+            num_boxes = 100
+            boxes_5d = torch.zeros(num_boxes, 5)
+            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
+            rotated_boxes = RotatedBoxes(boxes_5d)
+            normalized_boxes = rotated_boxes.clone()
+            normalized_boxes.normalize_angles()
+            self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] >= -180))
+            self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] < 180))
+            # x, y, w, h should not change
+            self.assertTrue(torch.allclose(boxes_5d[:, :4], normalized_boxes.tensor[:, :4]))
+            # the cos/sin values of the angles should stay the same
+
+            self.assertTrue(
+                torch.allclose(
+                    torch.cos(boxes_5d[:, 4] * math.pi / 180),
+                    torch.cos(normalized_boxes.tensor[:, 4] * math.pi / 180),
+                    atol=1e-5,
+                )
+            )
+
+            self.assertTrue(
+                torch.allclose(
+                    torch.sin(boxes_5d[:, 4] * math.pi / 180),
+                    torch.sin(normalized_boxes.tensor[:, 4] * math.pi / 180),
+                    atol=1e-5,
+                )
+            )
+
+    def test_pairwise_iou_0_degree(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            boxes1 = torch.tensor(
+                [[0.5, 0.5, 1.0, 1.0, 0.0], [0.5, 0.5, 1.0, 1.0, 0.0]],
+                dtype=torch.float32,
+                device=device,
+            )
+            boxes2 = torch.tensor(
+                [
+                    [0.5, 0.5, 1.0, 1.0, 0.0],
+                    [0.25, 0.5, 0.5, 1.0, 0.0],
+                    [0.5, 0.25, 1.0, 0.5, 0.0],
+                    [0.25, 0.25, 0.5, 0.5, 0.0],
+                    [0.75, 0.75, 0.5, 0.5, 0.0],
+                    [1.0, 1.0, 1.0, 1.0, 0.0],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            expected_ious = torch.tensor(
+                [
+                    [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+                    [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_45_degrees(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            boxes1 = torch.tensor(
+                [
+                    [1, 1, math.sqrt(2), math.sqrt(2), 45],
+                    [1, 1, 2 * math.sqrt(2), 2 * math.sqrt(2), -45],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            boxes2 = torch.tensor([[1, 1, 2, 2, 0]], dtype=torch.float32, device=device)
+            expected_ious = torch.tensor([[0.5], [0.5]], dtype=torch.float32, device=device)
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_orthogonal(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            boxes1 = torch.tensor([[5, 5, 10, 6, 55]], dtype=torch.float32, device=device)
+            boxes2 = torch.tensor([[5, 5, 10, 6, -35]], dtype=torch.float32, device=device)
+            iou = (6.0 * 6.0) / (6.0 * 6.0 + 4.0 * 6.0 + 4.0 * 6.0)
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_large_close_boxes(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            boxes1 = torch.tensor(
+                [[299.500000, 417.370422, 600.000000, 364.259186, 27.1828]],
+                dtype=torch.float32,
+                device=device,
+            )
+            boxes2 = torch.tensor(
+                [[299.500000, 417.370422, 600.000000, 364.259155, 27.1828]],
+                dtype=torch.float32,
+                device=device,
+            )
+            iou = 364.259155 / 364.259186
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_many_boxes(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            num_boxes1 = 100
+            num_boxes2 = 200
+            boxes1 = torch.stack(
+                [
+                    torch.tensor(
+                        [5 + 20 * i, 5 + 20 * i, 10, 10, 0],
+                        dtype=torch.float32,
+                        device=device,
+                    )
+                    for i in range(num_boxes1)
+                ]
+            )
+            boxes2 = torch.stack(
+                [
+                    torch.tensor(
+                        [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0],
+                        dtype=torch.float32,
+                        device=device,
+                    )
+                    for i in range(num_boxes2)
+                ]
+            )
+            expected_ious = torch.zeros(num_boxes1, num_boxes2, dtype=torch.float32, device=device)
+            for i in range(min(num_boxes1, num_boxes2)):
+                expected_ious[i][i] = (1 + 9 * i / num_boxes2) / 10.0
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_issue1207_simplified(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            # Simplified test case of D2-issue-1207
+            boxes1 = torch.tensor([[3, 3, 8, 2, -45.0]], device=device)
+            boxes2 = torch.tensor([[6, 0, 8, 2, -45.0]], device=device)
+            iou = 0.0
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_issue1207(self):
+        for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []):
+            # The original test case in D2-issue-1207
+            boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device)
+            boxes2 = torch.tensor([[190.0, 127.0, 80.0, 21.0, -46.0]], device=device)
+
+            iou = 0.0
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_empty_cat(self):
+        x = RotatedBoxes.cat([])
+        self.assertTrue(x.tensor.shape, (0, 5))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 8), "Insufficient pytorch version")
+    def test_scriptability(self):
+        def func(x):
+            boxes = RotatedBoxes(x)
+            test = boxes.to(torch.device("cpu")).tensor
+            return boxes.area(), test
+
+        f = torch.jit.script(func)
+        f = reload_script_model(f)
+        f(torch.rand((3, 5)))
+
+        data = torch.rand((3, 5))
+
+        def func_cat(x: torch.Tensor):
+            boxes1 = RotatedBoxes(x)
+            boxes2 = RotatedBoxes(x)
+            # this is not supported by torchscript for now.
+            # boxes3 = RotatedBoxes.cat([boxes1, boxes2])
+            boxes3 = boxes1.cat([boxes1, boxes2])
+            return boxes3
+
+        f = torch.jit.script(func_cat)
+        script_box = f(data)
+        self.assertTrue(torch.equal(torch.cat([data, data]), script_box.tensor))
+
+
+def benchmark_rotated_iou():
+    num_boxes1 = 200
+    num_boxes2 = 500
+    boxes1 = torch.stack(
+        [
+            torch.tensor([5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32)
+            for i in range(num_boxes1)
+        ]
+    )
+    boxes2 = torch.stack(
+        [
+            torch.tensor(
+                [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0],
+                dtype=torch.float32,
+            )
+            for i in range(num_boxes2)
+        ]
+    )
+
+    def func(dev, n=1):
+        b1 = boxes1.to(device=dev)
+        b2 = boxes2.to(device=dev)
+
+        def bench():
+            for _ in range(n):
+                pairwise_iou_rotated(b1, b2)
+            if dev.type == "cuda":
+                torch.cuda.synchronize()
+
+        return bench
+
+    # only run it once per timed loop, since it's slow
+    args = [{"dev": torch.device("cpu"), "n": 1}]
+    if torch.cuda.is_available():
+        args.append({"dev": torch.device("cuda"), "n": 10})
+
+    benchmark(func, "rotated_iou", args, warmup_iters=3)
+
+
+if __name__ == "__main__":
+    unittest.main()
+    benchmark_rotated_iou()
diff --git a/src/sts/tests/test_checkpoint.py b/src/sts/tests/test_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab0bfbd590e0c37e377cc93bf99c7c005fc4bdd6
--- /dev/null
+++ b/src/sts/tests/test_checkpoint.py
@@ -0,0 +1,49 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import unittest
+from collections import OrderedDict
+import torch
+from torch import nn
+
+from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts
+from detectron2.utils.logger import setup_logger
+
+
+class TestCheckpointer(unittest.TestCase):
+    def setUp(self):
+        setup_logger()
+
+    def create_complex_model(self):
+        m = nn.Module()
+        m.block1 = nn.Module()
+        m.block1.layer1 = nn.Linear(2, 3)
+        m.layer2 = nn.Linear(3, 2)
+        m.res = nn.Module()
+        m.res.layer2 = nn.Linear(3, 2)
+
+        state_dict = OrderedDict()
+        state_dict["layer1.weight"] = torch.rand(3, 2)
+        state_dict["layer1.bias"] = torch.rand(3)
+        state_dict["layer2.weight"] = torch.rand(2, 3)
+        state_dict["layer2.bias"] = torch.rand(2)
+        state_dict["res.layer2.weight"] = torch.rand(2, 3)
+        state_dict["res.layer2.bias"] = torch.rand(2)
+        return m, state_dict
+
+    def test_complex_model_loaded(self):
+        for add_data_parallel in [False, True]:
+            model, state_dict = self.create_complex_model()
+            if add_data_parallel:
+                model = nn.DataParallel(model)
+            model_sd = model.state_dict()
+
+            sd_to_load = align_and_update_state_dicts(model_sd, state_dict)
+            model.load_state_dict(sd_to_load)
+            for loaded, stored in zip(model_sd.values(), state_dict.values()):
+                # different tensor references
+                self.assertFalse(id(loaded) == id(stored))
+                # same content
+                self.assertTrue(loaded.to(stored).equal(stored))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/test_engine.py b/src/sts/tests/test_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ca25d7304b8c02488b4521dd8145b414ffc5ce5
--- /dev/null
+++ b/src/sts/tests/test_engine.py
@@ -0,0 +1,95 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import json
+import os
+import tempfile
+import time
+import unittest
+import torch
+from torch import nn
+
+from detectron2.config import configurable, get_cfg
+from detectron2.engine import DefaultTrainer, SimpleTrainer, hooks
+from detectron2.modeling.meta_arch import META_ARCH_REGISTRY
+from detectron2.utils.events import CommonMetricPrinter, JSONWriter
+
+
+@META_ARCH_REGISTRY.register()
+class _SimpleModel(nn.Module):
+    @configurable
+    def __init__(self, sleep_sec=0):
+        super().__init__()
+        self.mod = nn.Linear(10, 20)
+        self.sleep_sec = sleep_sec
+
+    @classmethod
+    def from_config(cls, cfg):
+        return {}
+
+    def forward(self, x):
+        if self.sleep_sec > 0:
+            time.sleep(self.sleep_sec)
+        return {"loss": x.sum() + sum([x.mean() for x in self.parameters()])}
+
+
+class TestTrainer(unittest.TestCase):
+    def _data_loader(self, device):
+        device = torch.device(device)
+        while True:
+            yield torch.rand(3, 3).to(device)
+
+    def test_simple_trainer(self, device="cpu"):
+        model = _SimpleModel().to(device=device)
+        trainer = SimpleTrainer(
+            model, self._data_loader(device), torch.optim.SGD(model.parameters(), 0.1)
+        )
+        trainer.train(0, 10)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_simple_trainer_cuda(self):
+        self.test_simple_trainer(device="cuda")
+
+    def test_writer_hooks(self):
+        model = _SimpleModel(sleep_sec=0.1)
+        trainer = SimpleTrainer(
+            model, self._data_loader("cpu"), torch.optim.SGD(model.parameters(), 0.1)
+        )
+
+        max_iter = 50
+
+        with tempfile.TemporaryDirectory(prefix="detectron2_test") as d:
+            json_file = os.path.join(d, "metrics.json")
+            writers = [CommonMetricPrinter(max_iter), JSONWriter(json_file)]
+
+            trainer.register_hooks(
+                [hooks.EvalHook(0, lambda: {"metric": 100}), hooks.PeriodicWriter(writers)]
+            )
+            with self.assertLogs(writers[0].logger) as logs:
+                trainer.train(0, max_iter)
+
+            with open(json_file, "r") as f:
+                data = [json.loads(line.strip()) for line in f]
+                self.assertEqual([x["iteration"] for x in data], [19, 39, 49, 50])
+                # the eval metric is in the last line with iter 50
+                self.assertIn("metric", data[-1], "Eval metric must be in last line of JSON!")
+
+            # test logged messages from CommonMetricPrinter
+            self.assertEqual(len(logs.output), 3)
+            for log, iter in zip(logs.output, [19, 39, 49]):
+                self.assertIn(f"iter: {iter}", log)
+
+            self.assertIn("eta: 0:00:00", logs.output[-1], "Last ETA must be 0!")
+
+    @unittest.skipIf(os.environ.get("CI"), "Require COCO data.")
+    def test_default_trainer(self):
+        cfg = get_cfg()
+        cfg.MODEL.META_ARCHITECTURE = "_SimpleModel"
+        cfg.DATASETS.TRAIN = ("coco_2017_val_100",)
+        with tempfile.TemporaryDirectory(prefix="detectron2_test") as d:
+            cfg.OUTPUT_DIR = d
+            trainer = DefaultTrainer(cfg)
+
+            # test property
+            self.assertIs(trainer.model, trainer._trainer.model)
+            trainer.model = _SimpleModel()
+            self.assertIs(trainer.model, trainer._trainer.model)
diff --git a/src/sts/tests/test_events.py b/src/sts/tests/test_events.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1b03e4d1a703a417a83c2805be1ca15a4e458ed
--- /dev/null
+++ b/src/sts/tests/test_events.py
@@ -0,0 +1,64 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import json
+import os
+import tempfile
+import unittest
+
+from detectron2.utils.events import CommonMetricPrinter, EventStorage, JSONWriter
+
+
+class TestEventWriter(unittest.TestCase):
+    def testScalar(self):
+        with tempfile.TemporaryDirectory(
+            prefix="detectron2_tests"
+        ) as dir, EventStorage() as storage:
+            json_file = os.path.join(dir, "test.json")
+            writer = JSONWriter(json_file)
+            for k in range(60):
+                storage.put_scalar("key", k, smoothing_hint=False)
+                if (k + 1) % 20 == 0:
+                    writer.write()
+                storage.step()
+            writer.close()
+            with open(json_file) as f:
+                data = [json.loads(l) for l in f]
+                self.assertTrue([int(k["key"]) for k in data] == [19, 39, 59])
+
+    def testScalarMismatchedPeriod(self):
+        with tempfile.TemporaryDirectory(
+            prefix="detectron2_tests"
+        ) as dir, EventStorage() as storage:
+            json_file = os.path.join(dir, "test.json")
+
+            writer = JSONWriter(json_file)
+            for k in range(60):
+                if k % 17 == 0:  # write in a differnt period
+                    storage.put_scalar("key2", k, smoothing_hint=False)
+                storage.put_scalar("key", k, smoothing_hint=False)
+                if (k + 1) % 20 == 0:
+                    writer.write()
+                storage.step()
+            writer.close()
+            with open(json_file) as f:
+                data = [json.loads(l) for l in f]
+                self.assertTrue([int(k.get("key2", 0)) for k in data] == [17, 0, 34, 0, 51, 0])
+                self.assertTrue([int(k.get("key", 0)) for k in data] == [0, 19, 0, 39, 0, 59])
+                self.assertTrue([int(k["iteration"]) for k in data] == [17, 19, 34, 39, 51, 59])
+
+    def testPrintETA(self):
+        with EventStorage() as s:
+            p1 = CommonMetricPrinter(10)
+            p2 = CommonMetricPrinter()
+
+            s.put_scalar("time", 1.0)
+            s.step()
+            s.put_scalar("time", 1.0)
+            s.step()
+
+            with self.assertLogs("detectron2.utils.events") as logs:
+                p1.write()
+            self.assertIn("eta", logs.output[0])
+
+            with self.assertLogs("detectron2.utils.events") as logs:
+                p2.write()
+            self.assertNotIn("eta", logs.output[0])
diff --git a/src/sts/tests/test_export_caffe2.py b/src/sts/tests/test_export_caffe2.py
new file mode 100644
index 0000000000000000000000000000000000000000..70baf4b704c1195ddbbf42d35ee488be4fd3611c
--- /dev/null
+++ b/src/sts/tests/test_export_caffe2.py
@@ -0,0 +1,55 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# -*- coding: utf-8 -*-
+
+import copy
+import os
+import tempfile
+import unittest
+import torch
+
+from detectron2 import model_zoo
+from detectron2.utils.logger import setup_logger
+from detectron2.utils.testing import get_sample_coco_image
+
+
+@unittest.skipIf(os.environ.get("CI"), "Require COCO data and model zoo.")
+class TestCaffe2Export(unittest.TestCase):
+    def setUp(self):
+        setup_logger()
+
+    def _test_model(self, config_path, device="cpu"):
+        # requires extra dependencies
+        from detectron2.export import Caffe2Model, add_export_config, Caffe2Tracer
+
+        cfg = model_zoo.get_config(config_path)
+        add_export_config(cfg)
+        cfg.MODEL.DEVICE = device
+        model = model_zoo.get(config_path, trained=True, device=device)
+
+        inputs = [{"image": get_sample_coco_image()}]
+        tracer = Caffe2Tracer(cfg, model, copy.deepcopy(inputs))
+
+        c2_model = tracer.export_caffe2()
+
+        with tempfile.TemporaryDirectory(prefix="detectron2_unittest") as d:
+            c2_model.save_protobuf(d)
+            c2_model.save_graph(os.path.join(d, "test.svg"), inputs=copy.deepcopy(inputs))
+
+            c2_model = Caffe2Model.load_protobuf(d)
+            c2_model(inputs)[0]["instances"]
+
+            ts_model = tracer.export_torchscript()
+            ts_model.save(os.path.join(d, "model.ts"))
+
+    def testMaskRCNN(self):
+        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def testMaskRCNNGPU(self):
+        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", device="cuda")
+
+    def testRetinaNet(self):
+        self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml")
+
+    def testPanopticFPN(self):
+        self._test_model("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")
diff --git a/src/sts/tests/test_export_torchscript.py b/src/sts/tests/test_export_torchscript.py
new file mode 100644
index 0000000000000000000000000000000000000000..6829609ac617f3d5fefee28b64f7cf367875c3e1
--- /dev/null
+++ b/src/sts/tests/test_export_torchscript.py
@@ -0,0 +1,213 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import json
+import os
+import tempfile
+import unittest
+import torch
+from torch import Tensor, nn
+
+from detectron2 import model_zoo
+from detectron2.config import get_cfg
+from detectron2.config.instantiate import dump_dataclass, instantiate
+from detectron2.export import dump_torchscript_IR, scripting_with_instances
+from detectron2.export.flatten import TracingAdapter, flatten_to_tuple
+from detectron2.export.torchscript_patch import patch_builtin_len
+from detectron2.layers import ShapeSpec
+from detectron2.modeling import build_backbone
+from detectron2.modeling.postprocessing import detector_postprocess
+from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.testing import (
+    assert_instances_allclose,
+    convert_scripted_instances,
+    get_sample_coco_image,
+    random_boxes,
+)
+
+
+"""
+https://detectron2.readthedocs.io/tutorials/deployment.html
+contains some explanations of this file.
+"""
+
+
+@unittest.skipIf(os.environ.get("CI") or TORCH_VERSION < (1, 8), "Insufficient Pytorch version")
+class TestScripting(unittest.TestCase):
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def testMaskRCNN(self):
+        self._test_rcnn_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def testRetinaNet(self):
+        self._test_retinanet_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml")
+
+    def _test_rcnn_model(self, config_path):
+        model = model_zoo.get(config_path, trained=True)
+        model.eval()
+
+        fields = {
+            "proposal_boxes": Boxes,
+            "objectness_logits": Tensor,
+            "pred_boxes": Boxes,
+            "scores": Tensor,
+            "pred_classes": Tensor,
+            "pred_masks": Tensor,
+        }
+        script_model = scripting_with_instances(model, fields)
+
+        inputs = [{"image": get_sample_coco_image()}]
+        with torch.no_grad():
+            instance = model.inference(inputs, do_postprocess=False)[0]
+            scripted_instance = script_model.inference(inputs, do_postprocess=False)[0]
+        assert_instances_allclose(instance, scripted_instance)
+
+    def _test_retinanet_model(self, config_path):
+        model = model_zoo.get(config_path, trained=True)
+        model.eval()
+
+        fields = {
+            "pred_boxes": Boxes,
+            "scores": Tensor,
+            "pred_classes": Tensor,
+        }
+        script_model = scripting_with_instances(model, fields)
+
+        img = get_sample_coco_image()
+        inputs = [{"image": img}]
+        with torch.no_grad():
+            instance = model(inputs)[0]["instances"]
+            scripted_instance = convert_scripted_instances(script_model(inputs)[0])
+            scripted_instance = detector_postprocess(scripted_instance, img.shape[1], img.shape[2])
+        assert_instances_allclose(instance, scripted_instance)
+        # Note that the model currently cannot be saved and loaded into a new process:
+        # https://github.com/pytorch/pytorch/issues/46944
+
+
+@unittest.skipIf(os.environ.get("CI") or TORCH_VERSION < (1, 8), "Insufficient Pytorch version")
+class TestTracing(unittest.TestCase):
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def testMaskRCNN(self):
+        def inference_func(model, image):
+            inputs = [{"image": image}]
+            return model.inference(inputs, do_postprocess=False)[0]
+
+        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", inference_func)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def testRetinaNet(self):
+        def inference_func(model, image):
+            return model.forward([{"image": image}])[0]["instances"]
+
+        self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml", inference_func)
+
+    def _test_model(self, config_path, inference_func):
+        model = model_zoo.get(config_path, trained=True)
+        image = get_sample_coco_image()
+
+        wrapper = TracingAdapter(model, image, inference_func)
+        wrapper.eval()
+        with torch.no_grad():
+            small_image = nn.functional.interpolate(image, scale_factor=0.5)
+            # trace with a different image, and the trace must still work
+            traced_model = torch.jit.trace(wrapper, (small_image,))
+
+            output = inference_func(model, image)
+            traced_output = wrapper.outputs_schema(traced_model(image))
+        assert_instances_allclose(output, traced_output, size_as_tensor=True)
+
+    def testKeypointHead(self):
+        class M(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.model = KRCNNConvDeconvUpsampleHead(
+                    ShapeSpec(channels=4, height=14, width=14), num_keypoints=17, conv_dims=(4,)
+                )
+
+            def forward(self, x, predbox1, predbox2):
+                inst = [
+                    Instances((100, 100), pred_boxes=Boxes(predbox1)),
+                    Instances((100, 100), pred_boxes=Boxes(predbox2)),
+                ]
+                ret = self.model(x, inst)
+                return tuple(x.pred_keypoints for x in ret)
+
+        model = M()
+        model.eval()
+
+        def gen_input(num1, num2):
+            feat = torch.randn((num1 + num2, 4, 14, 14))
+            box1 = random_boxes(num1)
+            box2 = random_boxes(num2)
+            return feat, box1, box2
+
+        with torch.no_grad(), patch_builtin_len():
+            trace = torch.jit.trace(model, gen_input(15, 15), check_trace=False)
+
+            inputs = gen_input(12, 10)
+            trace_outputs = trace(*inputs)
+            true_outputs = model(*inputs)
+            for trace_output, true_output in zip(trace_outputs, true_outputs):
+                self.assertTrue(torch.allclose(trace_output, true_output))
+
+
+class TestTorchscriptUtils(unittest.TestCase):
+    # TODO: add test to dump scripting
+    def test_dump_IR_tracing(self):
+        cfg = get_cfg()
+        cfg.MODEL.RESNETS.DEPTH = 18
+        cfg.MODEL.RESNETS.RES2_OUT_CHANNELS = 64
+
+        class Mod(nn.Module):
+            def forward(self, x):
+                return tuple(self.m(x).values())
+
+        model = Mod()
+        model.m = build_backbone(cfg)
+        model.eval()
+
+        with torch.no_grad():
+            ts_model = torch.jit.trace(model, (torch.rand(2, 3, 224, 224),))
+
+        with tempfile.TemporaryDirectory(prefix="detectron2_test") as d:
+            dump_torchscript_IR(ts_model, d)
+            # check that the files are created
+            for name in ["model_ts_code", "model_ts_IR", "model_ts_IR_inlined", "model"]:
+                fname = os.path.join(d, name + ".txt")
+                self.assertTrue(os.stat(fname).st_size > 0, fname)
+
+    def test_flatten_basic(self):
+        obj = [3, ([5, 6], {"name": [7, 9], "name2": 3})]
+        res, schema = flatten_to_tuple(obj)
+        self.assertEqual(res, (3, 5, 6, 7, 9, 3))
+        new_obj = schema(res)
+        self.assertEqual(new_obj, obj)
+
+        _, new_schema = flatten_to_tuple(new_obj)
+        self.assertEqual(schema, new_schema)  # test __eq__
+        self._check_schema(schema)
+
+    def _check_schema(self, schema):
+        dumped_schema = dump_dataclass(schema)
+        # Check that the schema is json-serializable
+        # Although in reality you might want to use yaml because it often has many levels
+        json.dumps(dumped_schema)
+
+        # Check that the schema can be deserialized
+        new_schema = instantiate(dumped_schema)
+        self.assertEqual(schema, new_schema)
+
+    def test_flatten_instances_boxes(self):
+        inst = Instances(
+            torch.tensor([5, 8]), pred_masks=torch.tensor([3]), pred_boxes=Boxes(torch.ones((1, 4)))
+        )
+        obj = [3, ([5, 6], inst)]
+        res, schema = flatten_to_tuple(obj)
+        self.assertEqual(res[:3], (3, 5, 6))
+        for r, expected in zip(res[3:], (inst.pred_boxes.tensor, inst.pred_masks, inst.image_size)):
+            self.assertIs(r, expected)
+        new_obj = schema(res)
+        assert_instances_allclose(new_obj[1][1], inst, rtol=0.0, size_as_tensor=True)
+
+        self._check_schema(schema)
diff --git a/src/sts/tests/test_instantiate_config.py b/src/sts/tests/test_instantiate_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c386d11d9001f32957d6f3306e606fa96245fc9
--- /dev/null
+++ b/src/sts/tests/test_instantiate_config.py
@@ -0,0 +1,88 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import os
+import tempfile
+import unittest
+import yaml
+from omegaconf import OmegaConf
+from omegaconf import __version__ as oc_version
+
+from detectron2.config.instantiate import LazyCall as L
+from detectron2.config.instantiate import instantiate
+from detectron2.layers import ShapeSpec
+
+OC_VERSION = tuple(int(x) for x in oc_version.split(".")[:2])
+
+
+class TestClass:
+    def __init__(self, int_arg, list_arg=None, dict_arg=None, extra_arg=None):
+        self.int_arg = int_arg
+        self.list_arg = list_arg
+        self.dict_arg = dict_arg
+        self.extra_arg = extra_arg
+
+    def __call__(self, call_arg):
+        return call_arg + self.int_arg
+
+
+@unittest.skipIf(OC_VERSION < (2, 1), "omegaconf version too old")
+class TestConstruction(unittest.TestCase):
+    def test_basic_construct(self):
+        objconf = L(TestClass)(
+            int_arg=3,
+            list_arg=[10],
+            dict_arg={},
+            extra_arg=L(TestClass)(int_arg=4, list_arg="${..list_arg}"),
+        )
+
+        obj = instantiate(objconf)
+        self.assertIsInstance(obj, TestClass)
+        self.assertEqual(obj.int_arg, 3)
+        self.assertEqual(obj.extra_arg.int_arg, 4)
+        self.assertEqual(obj.extra_arg.list_arg, obj.list_arg)
+
+        objconf.extra_arg.list_arg = [5]
+        obj = instantiate(objconf)
+        self.assertIsInstance(obj, TestClass)
+        self.assertEqual(obj.extra_arg.list_arg, [5])
+
+    def test_instantiate_other_obj(self):
+        # do nothing for other obj
+        self.assertEqual(instantiate(5), 5)
+        x = [3, 4, 5]
+        self.assertEqual(instantiate(x), x)
+        x = TestClass(1)
+        self.assertIs(instantiate(x), x)
+        x = {"xx": "yy"}
+        self.assertIs(instantiate(x), x)
+
+    def test_instantiate_lazy_target(self):
+        # _target_ is result of instantiate
+        objconf = L(L(len)(int_arg=3))(call_arg=4)
+        objconf._target_._target_ = TestClass
+        self.assertEqual(instantiate(objconf), 7)
+
+    def test_instantiate_lst(self):
+        lst = [1, 2, L(TestClass)(int_arg=1)]
+        x = L(TestClass)(int_arg=lst)  # list as an argument should be recursively instantiated
+        x = instantiate(x).int_arg
+        self.assertEqual(x[:2], [1, 2])
+        self.assertIsInstance(x[2], TestClass)
+        self.assertEqual(x[2].int_arg, 1)
+
+    def test_instantiate_namedtuple(self):
+        x = L(TestClass)(int_arg=ShapeSpec(channels=1, width=3))
+        # test serialization
+        with tempfile.TemporaryDirectory() as d:
+            fname = os.path.join(d, "d2_test.yaml")
+            OmegaConf.save(x, fname)
+            with open(fname) as f:
+                x = yaml.unsafe_load(f)
+
+        x = instantiate(x)
+        self.assertIsInstance(x.int_arg, ShapeSpec)
+        self.assertEqual(x.int_arg.channels, 1)
+
+    def test_bad_lazycall(self):
+        with self.assertRaises(Exception):
+            L(3)
diff --git a/src/sts/tests/test_model_analysis.py b/src/sts/tests/test_model_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..32982b4026baac7b9c7a96f7bedf3434768c4712
--- /dev/null
+++ b/src/sts/tests/test_model_analysis.py
@@ -0,0 +1,44 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+
+import unittest
+import torch
+
+from detectron2.utils.analysis import flop_count_operators, parameter_count
+from detectron2.utils.testing import get_model_no_weights
+
+
+class RetinaNetTest(unittest.TestCase):
+    def setUp(self):
+        self.model = get_model_no_weights("COCO-Detection/retinanet_R_50_FPN_1x.yaml")
+
+    def test_flop(self):
+        # RetinaNet supports flop-counting with random inputs
+        inputs = [{"image": torch.rand(3, 800, 800), "test_unused": "abcd"}]
+        res = flop_count_operators(self.model, inputs)
+        self.assertTrue(int(res["conv"]), 146)  # 146B flops
+
+    def test_param_count(self):
+        res = parameter_count(self.model)
+        self.assertTrue(res[""], 37915572)
+        self.assertTrue(res["backbone"], 31452352)
+
+
+class FasterRCNNTest(unittest.TestCase):
+    def setUp(self):
+        self.model = get_model_no_weights("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml")
+
+    def test_flop(self):
+        # Faster R-CNN supports flop-counting with random inputs
+        inputs = [{"image": torch.rand(3, 800, 800)}]
+        res = flop_count_operators(self.model, inputs)
+
+        # This only checks flops for backbone & proposal generator
+        # Flops for box head is not conv, and depends on #proposals, which is
+        # almost 0 for random inputs.
+        self.assertTrue(int(res["conv"]), 117)
+
+    def test_param_count(self):
+        res = parameter_count(self.model)
+        self.assertTrue(res[""], 41699936)
+        self.assertTrue(res["backbone"], 26799296)
diff --git a/src/sts/tests/test_model_zoo.py b/src/sts/tests/test_model_zoo.py
new file mode 100644
index 0000000000000000000000000000000000000000..c56716e4693c4a5ddf6c116f10369383bd1cd12d
--- /dev/null
+++ b/src/sts/tests/test_model_zoo.py
@@ -0,0 +1,29 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import unittest
+
+from detectron2 import model_zoo
+from detectron2.modeling import FPN, GeneralizedRCNN
+
+logger = logging.getLogger(__name__)
+
+
+class TestModelZoo(unittest.TestCase):
+    def test_get_returns_model(self):
+        model = model_zoo.get("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml", trained=False)
+        self.assertIsInstance(model, GeneralizedRCNN)
+        self.assertIsInstance(model.backbone, FPN)
+
+    def test_get_invalid_model(self):
+        self.assertRaises(RuntimeError, model_zoo.get, "Invalid/config.yaml")
+
+    def test_get_url(self):
+        url = model_zoo.get_checkpoint_url("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml")
+        self.assertEqual(
+            url,
+            "https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl",  # noqa
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/test_packaging.py b/src/sts/tests/test_packaging.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5b1661e8f341fe66a6e02c59fe172bce445782b
--- /dev/null
+++ b/src/sts/tests/test_packaging.py
@@ -0,0 +1,24 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import unittest
+
+from detectron2.utils.collect_env import collect_env_info
+
+
+class TestProjects(unittest.TestCase):
+    def test_import(self):
+        from detectron2.projects import point_rend
+
+        _ = point_rend.add_pointrend_config
+
+        import detectron2.projects.deeplab as deeplab
+
+        _ = deeplab.add_deeplab_config
+
+        # import detectron2.projects.panoptic_deeplab as panoptic_deeplab
+
+        # _ = panoptic_deeplab.add_panoptic_deeplab_config
+
+
+class TestCollectEnv(unittest.TestCase):
+    def test(self):
+        _ = collect_env_info()
diff --git a/src/sts/tests/test_registry.py b/src/sts/tests/test_registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..93027d40452d60f9a4d8ba6459772d3c46b66448
--- /dev/null
+++ b/src/sts/tests/test_registry.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import unittest
+import torch
+
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.utils.registry import _convert_target_to_string, locate
+
+
+class A:
+    class B:
+        pass
+
+
+class TestLocate(unittest.TestCase):
+    def _test_obj(self, obj):
+        name = _convert_target_to_string(obj)
+        newobj = locate(name)
+        self.assertIs(obj, newobj)
+
+    def test_basic(self):
+        self._test_obj(GeneralizedRCNN)
+
+    def test_inside_class(self):
+        # requires using __qualname__ instead of __name__
+        self._test_obj(A.B)
+
+    def test_builtin(self):
+        self._test_obj(len)
+        self._test_obj(dict)
+
+    def test_pytorch_optim(self):
+        # pydoc.locate does not work for it
+        self._test_obj(torch.optim.SGD)
+
+    def test_failure(self):
+        with self.assertRaises(ImportError):
+            locate("asdf")
diff --git a/src/sts/tests/test_scheduler.py b/src/sts/tests/test_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cccb03f74b594c06add44a134b526e41c2974f0
--- /dev/null
+++ b/src/sts/tests/test_scheduler.py
@@ -0,0 +1,68 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import math
+import numpy as np
+from unittest import TestCase
+import torch
+from fvcore.common.param_scheduler import CosineParamScheduler, MultiStepParamScheduler
+from torch import nn
+
+from detectron2.solver import LRMultiplier, WarmupParamScheduler
+
+
+class TestScheduler(TestCase):
+    def test_warmup_multistep(self):
+        p = nn.Parameter(torch.zeros(0))
+        opt = torch.optim.SGD([p], lr=5)
+
+        multiplier = WarmupParamScheduler(
+            MultiStepParamScheduler(
+                [1, 0.1, 0.01, 0.001],
+                milestones=[10, 15, 20],
+                num_updates=30,
+            ),
+            0.001,
+            5 / 30,
+        )
+        sched = LRMultiplier(opt, multiplier, 30)
+        # This is an equivalent of:
+        # sched = WarmupMultiStepLR(
+        # opt, milestones=[10, 15, 20], gamma=0.1, warmup_factor=0.001, warmup_iters=5)
+
+        p.sum().backward()
+        opt.step()
+
+        lrs = [0.005]
+        for _ in range(30):
+            sched.step()
+            lrs.append(opt.param_groups[0]["lr"])
+        self.assertTrue(np.allclose(lrs[:5], [0.005, 1.004, 2.003, 3.002, 4.001]))
+        self.assertTrue(np.allclose(lrs[5:10], 5.0))
+        self.assertTrue(np.allclose(lrs[10:15], 0.5))
+        self.assertTrue(np.allclose(lrs[15:20], 0.05))
+        self.assertTrue(np.allclose(lrs[20:], 0.005))
+
+    def test_warmup_cosine(self):
+        p = nn.Parameter(torch.zeros(0))
+        opt = torch.optim.SGD([p], lr=5)
+        multiplier = WarmupParamScheduler(
+            CosineParamScheduler(1, 0),
+            0.001,
+            5 / 30,
+        )
+        sched = LRMultiplier(opt, multiplier, 30)
+
+        p.sum().backward()
+        opt.step()
+        self.assertEqual(opt.param_groups[0]["lr"], 0.005)
+        lrs = [0.005]
+
+        for _ in range(30):
+            sched.step()
+            lrs.append(opt.param_groups[0]["lr"])
+        for idx, lr in enumerate(lrs):
+            expected_cosine = 2.5 * (1.0 + math.cos(math.pi * idx / 30))
+            if idx >= 5:
+                self.assertAlmostEqual(lr, expected_cosine)
+            else:
+                self.assertNotAlmostEqual(lr, expected_cosine)
diff --git a/src/sts/tests/test_visualizer.py b/src/sts/tests/test_visualizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e53f94d0d2f2931e5cc0a0bb2e96bf1f1dff435
--- /dev/null
+++ b/src/sts/tests/test_visualizer.py
@@ -0,0 +1,253 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import numpy as np
+import os
+import tempfile
+import unittest
+import cv2
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BoxMode, Instances, RotatedBoxes
+from detectron2.utils.visualizer import ColorMode, Visualizer
+
+
+class TestVisualizer(unittest.TestCase):
+    def _random_data(self):
+        H, W = 100, 100
+        N = 10
+        img = np.random.rand(H, W, 3) * 255
+        boxxy = np.random.rand(N, 2) * (H // 2)
+        boxes = np.concatenate((boxxy, boxxy + H // 2), axis=1)
+
+        def _rand_poly():
+            return np.random.rand(3, 2).flatten() * H
+
+        polygons = [[_rand_poly() for _ in range(np.random.randint(1, 5))] for _ in range(N)]
+
+        mask = np.zeros_like(img[:, :, 0], dtype=np.bool)
+        mask[:40, 10:20] = 1
+
+        labels = [str(i) for i in range(N)]
+        return img, boxes, labels, polygons, [mask] * N
+
+    @property
+    def metadata(self):
+        return MetadataCatalog.get("coco_2017_train")
+
+    def test_draw_dataset_dict(self):
+        img = np.random.rand(512, 512, 3) * 255
+        dic = {
+            "annotations": [
+                {
+                    "bbox": [
+                        368.9946492271106,
+                        330.891438763377,
+                        13.148537455410235,
+                        13.644708680142685,
+                    ],
+                    "bbox_mode": BoxMode.XYWH_ABS,
+                    "category_id": 0,
+                    "iscrowd": 1,
+                    "segmentation": {
+                        "counts": "_jh52m?2N2N2N2O100O10O001N1O2MceP2",
+                        "size": [512, 512],
+                    },
+                }
+            ],
+            "height": 512,
+            "image_id": 1,
+            "width": 512,
+        }
+        v = Visualizer(img)
+        v.draw_dataset_dict(dic)
+
+        v = Visualizer(img, self.metadata)
+        v.draw_dataset_dict(dic)
+
+    def test_draw_rotated_dataset_dict(self):
+        img = np.random.rand(512, 512, 3) * 255
+        dic = {
+            "annotations": [
+                {
+                    "bbox": [
+                        368.9946492271106,
+                        330.891438763377,
+                        13.148537455410235,
+                        13.644708680142685,
+                        45.0,
+                    ],
+                    "bbox_mode": BoxMode.XYWHA_ABS,
+                    "category_id": 0,
+                    "iscrowd": 1,
+                }
+            ],
+            "height": 512,
+            "image_id": 1,
+            "width": 512,
+        }
+        v = Visualizer(img, self.metadata)
+        v.draw_dataset_dict(dic)
+
+    def test_overlay_instances(self):
+        img, boxes, labels, polygons, masks = self._random_data()
+
+        v = Visualizer(img, self.metadata)
+        output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
+        self.assertEqual(output.shape, img.shape)
+
+        # Test 2x scaling
+        v = Visualizer(img, self.metadata, scale=2.0)
+        output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
+        self.assertEqual(output.shape[0], img.shape[0] * 2)
+
+        # Test overlay masks
+        v = Visualizer(img, self.metadata)
+        output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image()
+        self.assertEqual(output.shape, img.shape)
+
+    def test_overlay_instances_no_boxes(self):
+        img, boxes, labels, polygons, _ = self._random_data()
+        v = Visualizer(img, self.metadata)
+        v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image()
+
+    def test_draw_instance_predictions(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+        inst.pred_masks = torch.from_numpy(np.asarray(masks))
+
+        v = Visualizer(img)
+        v.draw_instance_predictions(inst)
+
+        v = Visualizer(img, self.metadata)
+        v.draw_instance_predictions(inst)
+
+    def test_BWmode_nomask(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+
+        v = Visualizer(img, self.metadata, instance_mode=ColorMode.IMAGE_BW)
+        v.draw_instance_predictions(inst)
+
+    def test_draw_empty_mask_predictions(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+        inst.pred_masks = torch.from_numpy(np.zeros_like(np.asarray(masks)))
+
+        v = Visualizer(img, self.metadata)
+        v.draw_instance_predictions(inst)
+
+    def test_correct_output_shape(self):
+        img = np.random.rand(928, 928, 3) * 255
+        v = Visualizer(img, self.metadata)
+        out = v.output.get_image()
+        self.assertEqual(out.shape, img.shape)
+
+    def test_overlay_rotated_instances(self):
+        H, W = 100, 150
+        img = np.random.rand(H, W, 3) * 255
+        num_boxes = 50
+        boxes_5d = torch.zeros(num_boxes, 5)
+        boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W)
+        boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H)
+        boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
+        boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
+        boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
+        rotated_boxes = RotatedBoxes(boxes_5d)
+        labels = [str(i) for i in range(num_boxes)]
+
+        v = Visualizer(img, self.metadata)
+        output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image()
+        self.assertEqual(output.shape, img.shape)
+
+    def test_draw_no_metadata(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+        inst.pred_masks = torch.from_numpy(np.asarray(masks))
+
+        v = Visualizer(img, MetadataCatalog.get("asdfasdf"))
+        v.draw_instance_predictions(inst)
+
+    def test_draw_binary_mask(self):
+        img, boxes, _, _, masks = self._random_data()
+        img[:, :, 0] = 0  # remove red color
+        mask = masks[0]
+        mask_with_hole = np.zeros_like(mask).astype("uint8")
+        mask_with_hole = cv2.rectangle(mask_with_hole, (10, 10), (50, 50), 1, 5)
+
+        for m in [mask, mask_with_hole]:
+            for save in [True, False]:
+                v = Visualizer(img)
+                o = v.draw_binary_mask(m, color="red", text="test")
+                if save:
+                    with tempfile.TemporaryDirectory(prefix="detectron2_viz") as d:
+                        path = os.path.join(d, "output.png")
+                        o.save(path)
+                        o = cv2.imread(path)[:, :, ::-1]
+                else:
+                    o = o.get_image().astype("float32")
+                    # red color is drawn on the image
+                self.assertTrue(o[:, :, 0].sum() > 0)
+
+    def test_border_mask_with_holes(self):
+        H, W = 200, 200
+        img = np.zeros((H, W, 3))
+        img[:, :, 0] = 255.0
+        v = Visualizer(img, scale=3)
+
+        mask = np.zeros((H, W))
+        mask[:, 100:150] = 1
+        # create a hole, to trigger imshow
+        mask = cv2.rectangle(mask, (110, 110), (130, 130), 0, thickness=-1)
+        output = v.draw_binary_mask(mask, color="blue")
+        output = output.get_image()[:, :, ::-1]
+
+        first_row = {tuple(x.tolist()) for x in output[0]}
+        last_row = {tuple(x.tolist()) for x in output[-1]}
+        # Check quantization / off-by-1 error: the first and last row must have two colors
+        self.assertEqual(len(last_row), 2)
+        self.assertEqual(len(first_row), 2)
+        self.assertIn((0, 0, 255), last_row)
+        self.assertIn((0, 0, 255), first_row)
+
+    def test_border_polygons(self):
+        H, W = 200, 200
+        img = np.zeros((H, W, 3))
+        img[:, :, 0] = 255.0
+        v = Visualizer(img, scale=3)
+        mask = np.zeros((H, W))
+        mask[:, 100:150] = 1
+
+        output = v.draw_binary_mask(mask, color="blue")
+        output = output.get_image()[:, :, ::-1]
+
+        first_row = {tuple(x.tolist()) for x in output[0]}
+        last_row = {tuple(x.tolist()) for x in output[-1]}
+        # Check quantization / off-by-1 error:
+        # the first and last row must have >=2 colors, because the polygon
+        # touches both rows
+        self.assertGreaterEqual(len(last_row), 2)
+        self.assertGreaterEqual(len(first_row), 2)
+        self.assertIn((0, 0, 255), last_row)
+        self.assertIn((0, 0, 255), first_row)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/sts/tests/test_yacs_config.py b/src/sts/tests/test_yacs_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..d66d0649104a6f8435600d745eb1b6ca45876774
--- /dev/null
+++ b/src/sts/tests/test_yacs_config.py
@@ -0,0 +1,268 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+
+import os
+import tempfile
+import unittest
+import torch
+from omegaconf import OmegaConf
+
+from detectron2 import model_zoo
+from detectron2.config import configurable, downgrade_config, get_cfg, upgrade_config
+from detectron2.layers import ShapeSpec
+from detectron2.modeling import build_model
+
+_V0_CFG = """
+MODEL:
+  RPN_HEAD:
+    NAME: "TEST"
+VERSION: 0
+"""
+
+_V1_CFG = """
+MODEL:
+  WEIGHT: "/path/to/weight"
+"""
+
+
+class TestConfigVersioning(unittest.TestCase):
+    def test_upgrade_downgrade_consistency(self):
+        cfg = get_cfg()
+        # check that custom is preserved
+        cfg.USER_CUSTOM = 1
+
+        down = downgrade_config(cfg, to_version=0)
+        up = upgrade_config(down)
+        self.assertTrue(up == cfg)
+
+    def _merge_cfg_str(self, cfg, merge_str):
+        f = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False)
+        try:
+            f.write(merge_str)
+            f.close()
+            cfg.merge_from_file(f.name)
+        finally:
+            os.remove(f.name)
+        return cfg
+
+    def test_auto_upgrade(self):
+        cfg = get_cfg()
+        latest_ver = cfg.VERSION
+        cfg.USER_CUSTOM = 1
+
+        self._merge_cfg_str(cfg, _V0_CFG)
+
+        self.assertEqual(cfg.MODEL.RPN.HEAD_NAME, "TEST")
+        self.assertEqual(cfg.VERSION, latest_ver)
+
+    def test_guess_v1(self):
+        cfg = get_cfg()
+        latest_ver = cfg.VERSION
+        self._merge_cfg_str(cfg, _V1_CFG)
+        self.assertEqual(cfg.VERSION, latest_ver)
+
+
+class _TestClassA(torch.nn.Module):
+    @configurable
+    def __init__(self, arg1, arg2, arg3=3):
+        super().__init__()
+        self.arg1 = arg1
+        self.arg2 = arg2
+        self.arg3 = arg3
+        assert arg1 == 1
+        assert arg2 == 2
+        assert arg3 == 3
+
+    @classmethod
+    def from_config(cls, cfg):
+        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
+        return args
+
+
+class _TestClassB(_TestClassA):
+    @configurable
+    def __init__(self, input_shape, arg1, arg2, arg3=3):
+        """
+        Doc of _TestClassB
+        """
+        assert input_shape == "shape"
+        super().__init__(arg1, arg2, arg3)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):  # test extra positional arg in from_config
+        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
+        args["input_shape"] = input_shape
+        return args
+
+
+class _LegacySubClass(_TestClassB):
+    # an old subclass written in cfg style
+    def __init__(self, cfg, input_shape, arg4=4):
+        super().__init__(cfg, input_shape)
+        assert self.arg1 == 1
+        assert self.arg2 == 2
+        assert self.arg3 == 3
+
+
+class _NewSubClassNewInit(_TestClassB):
+    # test new subclass with a new __init__
+    @configurable
+    def __init__(self, input_shape, arg4=4, **kwargs):
+        super().__init__(input_shape, **kwargs)
+        assert self.arg1 == 1
+        assert self.arg2 == 2
+        assert self.arg3 == 3
+
+
+class _LegacySubClassNotCfg(_TestClassB):
+    # an old subclass written in cfg style, but argument is not called "cfg"
+    def __init__(self, config, input_shape):
+        super().__init__(config, input_shape)
+        assert self.arg1 == 1
+        assert self.arg2 == 2
+        assert self.arg3 == 3
+
+
+class _TestClassC(_TestClassB):
+    @classmethod
+    def from_config(cls, cfg, input_shape, **kwargs):  # test extra kwarg overwrite
+        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
+        args["input_shape"] = input_shape
+        args.update(kwargs)
+        return args
+
+
+class _TestClassD(_TestClassA):
+    @configurable
+    def __init__(self, input_shape: ShapeSpec, arg1: int, arg2, arg3=3):
+        assert input_shape == "shape"
+        super().__init__(arg1, arg2, arg3)
+
+    # _TestClassA.from_config does not have input_shape args.
+    # Test whether input_shape will be forwarded to __init__
+
+
+@configurable(from_config=lambda cfg, arg2: {"arg1": cfg.ARG1, "arg2": arg2, "arg3": cfg.ARG3})
+def _test_func(arg1, arg2=2, arg3=3, arg4=4):
+    return arg1, arg2, arg3, arg4
+
+
+class TestConfigurable(unittest.TestCase):
+    def testInitWithArgs(self):
+        _ = _TestClassA(arg1=1, arg2=2, arg3=3)
+        _ = _TestClassB("shape", arg1=1, arg2=2)
+        _ = _TestClassC("shape", arg1=1, arg2=2)
+        _ = _TestClassD("shape", arg1=1, arg2=2, arg3=3)
+
+    def testPatchedAttr(self):
+        self.assertTrue("Doc" in _TestClassB.__init__.__doc__)
+        self.assertEqual(_TestClassD.__init__.__annotations__["arg1"], int)
+
+    def testInitWithCfg(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 1
+        cfg.ARG2 = 2
+        cfg.ARG3 = 3
+        _ = _TestClassA(cfg)
+        _ = _TestClassB(cfg, input_shape="shape")
+        _ = _TestClassC(cfg, input_shape="shape")
+        _ = _TestClassD(cfg, input_shape="shape")
+        _ = _LegacySubClass(cfg, input_shape="shape")
+        _ = _NewSubClassNewInit(cfg, input_shape="shape")
+        _ = _LegacySubClassNotCfg(cfg, input_shape="shape")
+        with self.assertRaises(TypeError):
+            # disallow forwarding positional args to __init__ since it's prone to errors
+            _ = _TestClassD(cfg, "shape")
+
+        # call with kwargs instead
+        _ = _TestClassA(cfg=cfg)
+        _ = _TestClassB(cfg=cfg, input_shape="shape")
+        _ = _TestClassC(cfg=cfg, input_shape="shape")
+        _ = _TestClassD(cfg=cfg, input_shape="shape")
+        _ = _LegacySubClass(cfg=cfg, input_shape="shape")
+        _ = _NewSubClassNewInit(cfg=cfg, input_shape="shape")
+        _ = _LegacySubClassNotCfg(config=cfg, input_shape="shape")
+
+    def testInitWithCfgOverwrite(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 1
+        cfg.ARG2 = 999  # wrong config
+        with self.assertRaises(AssertionError):
+            _ = _TestClassA(cfg, arg3=3)
+
+        # overwrite arg2 with correct config later:
+        _ = _TestClassA(cfg, arg2=2, arg3=3)
+        _ = _TestClassB(cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassC(cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassD(cfg, input_shape="shape", arg2=2, arg3=3)
+
+        # call with kwargs cfg=cfg instead
+        _ = _TestClassA(cfg=cfg, arg2=2, arg3=3)
+        _ = _TestClassB(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassC(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassD(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
+
+    def testInitWithCfgWrongArgs(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 1
+        cfg.ARG2 = 2
+        with self.assertRaises(TypeError):
+            _ = _TestClassB(cfg, "shape", not_exist=1)
+        with self.assertRaises(TypeError):
+            _ = _TestClassC(cfg, "shape", not_exist=1)
+        with self.assertRaises(TypeError):
+            _ = _TestClassD(cfg, "shape", not_exist=1)
+
+    def testBadClass(self):
+        class _BadClass1:
+            @configurable
+            def __init__(self, a=1, b=2):
+                pass
+
+        class _BadClass2:
+            @configurable
+            def __init__(self, a=1, b=2):
+                pass
+
+            def from_config(self, cfg):  # noqa
+                pass
+
+        class _BadClass3:
+            @configurable
+            def __init__(self, a=1, b=2):
+                pass
+
+            # bad name: must be cfg
+            @classmethod
+            def from_config(cls, config):  # noqa
+                pass
+
+        with self.assertRaises(AttributeError):
+            _ = _BadClass1(a=1)
+
+        with self.assertRaises(TypeError):
+            _ = _BadClass2(a=1)
+
+        with self.assertRaises(TypeError):
+            _ = _BadClass3(get_cfg())
+
+    def testFuncWithCfg(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 10
+        cfg.ARG3 = 30
+
+        self.assertEqual(_test_func(1), (1, 2, 3, 4))
+        with self.assertRaises(TypeError):
+            _test_func(cfg)
+        self.assertEqual(_test_func(cfg, arg2=2), (10, 2, 30, 4))
+        self.assertEqual(_test_func(cfg, arg1=100, arg2=20), (100, 20, 30, 4))
+        self.assertEqual(_test_func(cfg, arg1=100, arg2=20, arg4=40), (100, 20, 30, 40))
+
+    def testOmegaConf(self):
+        cfg = model_zoo.get_config("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml")
+        cfg = OmegaConf.create(cfg.dump())
+        if not torch.cuda.is_available():
+            cfg.MODEL.DEVICE = "cpu"
+        # test that a model can be built with omegaconf config as well
+        build_model(cfg)
diff --git a/src/sts/tools/README.md b/src/sts/tools/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0b40d5319c0838fdaa22bc6a10ef0d88bc6578ed
--- /dev/null
+++ b/src/sts/tools/README.md
@@ -0,0 +1,49 @@
+
+This directory contains a few example scripts that demonstrate features of detectron2.
+
+
+* `train_net.py`
+
+An example training script that's made to train builtin models of detectron2.
+
+For usage, see [GETTING_STARTED.md](../GETTING_STARTED.md).
+
+* `plain_train_net.py`
+
+Similar to `train_net.py`, but implements a training loop instead of using `Trainer`.
+This script includes fewer features but it may be more friendly to hackers.
+
+* `benchmark.py`
+
+Benchmark the training speed, inference speed or data loading speed of a given config.
+
+Usage:
+```
+python benchmark.py --config-file config.yaml --task train/eval/data [optional DDP flags]
+```
+
+* `analyze_model.py`
+
+Analyze FLOPs, parameters, activations of a detectron2 model.  See its `--help` for usage.
+
+* `visualize_json_results.py`
+
+Visualize the json instance detection/segmentation results dumped by `COCOEvalutor` or `LVISEvaluator`
+
+Usage:
+```
+python visualize_json_results.py --input x.json --output dir/ --dataset coco_2017_val
+```
+If not using a builtin dataset, you'll need your own script or modify this script.
+
+* `visualize_data.py`
+
+Visualize ground truth raw annotations or training data (after preprocessing/augmentations).
+
+Usage:
+```
+python visualize_data.py --config-file config.yaml --source annotation/dataloader --output-dir dir/ [--show]
+```
+
+NOTE: the script does not stop by itself when using `--source dataloader` because a training
+dataloader is usually infinite.
diff --git a/src/sts/tools/analyze_model.py b/src/sts/tools/analyze_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d825814c94561b61e312319628070399fa3cef4
--- /dev/null
+++ b/src/sts/tools/analyze_model.py
@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import logging
+import numpy as np
+from collections import Counter
+import tqdm
+from fvcore.nn import flop_count_table  # can also try flop_count_str
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import build_detection_test_loader
+from detectron2.engine import default_argument_parser
+from detectron2.modeling import build_model
+from detectron2.utils.analysis import (
+    FlopCountAnalysis,
+    activation_count_operators,
+    parameter_count_table,
+)
+from detectron2.utils.logger import setup_logger
+
+logger = logging.getLogger("detectron2")
+
+
+def setup(args):
+    cfg = get_cfg()
+    cfg.merge_from_file(args.config_file)
+    cfg.DATALOADER.NUM_WORKERS = 0
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    setup_logger(name="fvcore")
+    setup_logger()
+    return cfg
+
+
+def do_flop(cfg):
+    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
+    model = build_model(cfg)
+    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
+    model.eval()
+
+    counts = Counter()
+    total_flops = []
+    for idx, data in zip(tqdm.trange(args.num_inputs), data_loader):  # noqa
+        flops = FlopCountAnalysis(model, data)
+        if idx > 0:
+            flops.unsupported_ops_warnings(False).uncalled_modules_warnings(False)
+        counts += flops.by_operator()
+        total_flops.append(flops.total())
+
+    logger.info("Flops table computed from only one input sample:\n" + flop_count_table(flops))
+    logger.info(
+        "Average GFlops for each type of operators:\n"
+        + str([(k, v / (idx + 1) / 1e9) for k, v in counts.items()])
+    )
+    logger.info(
+        "Total GFlops: {:.1f}±{:.1f}".format(np.mean(total_flops) / 1e9, np.std(total_flops) / 1e9)
+    )
+
+
+def do_activation(cfg):
+    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
+    model = build_model(cfg)
+    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
+    model.eval()
+
+    counts = Counter()
+    total_activations = []
+    for idx, data in zip(tqdm.trange(args.num_inputs), data_loader):  # noqa
+        count = activation_count_operators(model, data)
+        counts += count
+        total_activations.append(sum(count.values()))
+    logger.info(
+        "(Million) Activations for Each Type of Operators:\n"
+        + str([(k, v / idx) for k, v in counts.items()])
+    )
+    logger.info(
+        "Total (Million) Activations: {}±{}".format(
+            np.mean(total_activations), np.std(total_activations)
+        )
+    )
+
+
+def do_parameter(cfg):
+    model = build_model(cfg)
+    logger.info("Parameter Count:\n" + parameter_count_table(model, max_depth=5))
+
+
+def do_structure(cfg):
+    model = build_model(cfg)
+    logger.info("Model Structure:\n" + str(model))
+
+
+if __name__ == "__main__":
+    parser = default_argument_parser(
+        epilog="""
+Examples:
+
+To show parameters of a model:
+$ ./analyze_model.py --tasks parameter \\
+    --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
+
+Flops and activations are data-dependent, therefore inputs and model weights
+are needed to count them:
+
+$ ./analyze_model.py --num-inputs 100 --tasks flop \\
+    --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \\
+    MODEL.WEIGHTS /path/to/model.pkl
+"""
+    )
+    parser.add_argument(
+        "--tasks",
+        choices=["flop", "activation", "parameter", "structure"],
+        required=True,
+        nargs="+",
+    )
+    parser.add_argument(
+        "-n",
+        "--num-inputs",
+        default=100,
+        type=int,
+        help="number of inputs used to compute statistics for flops/activations, "
+        "both are data dependent.",
+    )
+    args = parser.parse_args()
+    assert not args.eval_only
+    assert args.num_gpus == 1
+
+    cfg = setup(args)
+
+    for task in args.tasks:
+        {
+            "flop": do_flop,
+            "activation": do_activation,
+            "parameter": do_parameter,
+            "structure": do_structure,
+        }[task](cfg)
diff --git a/src/sts/tools/benchmark.py b/src/sts/tools/benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8075e57f720bf9c3a78321abba77a0d69776306
--- /dev/null
+++ b/src/sts/tools/benchmark.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+A script to benchmark builtin models.
+
+Note: this script has an extra dependency of psutil.
+"""
+
+import itertools
+import logging
+import psutil
+import torch
+import tqdm
+from fvcore.common.timer import Timer
+from torch.nn.parallel import DistributedDataParallel
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import (
+    DatasetFromList,
+    build_detection_test_loader,
+    build_detection_train_loader,
+)
+from detectron2.engine import AMPTrainer, SimpleTrainer, default_argument_parser, hooks, launch
+from detectron2.modeling import build_model
+from detectron2.solver import build_optimizer
+from detectron2.utils import comm
+from detectron2.utils.collect_env import collect_env_info
+from detectron2.utils.events import CommonMetricPrinter
+from detectron2.utils.logger import setup_logger
+
+logger = logging.getLogger("detectron2")
+
+
+def setup(args):
+    cfg = get_cfg()
+    cfg.merge_from_file(args.config_file)
+    cfg.SOLVER.BASE_LR = 0.001  # Avoid NaNs. Not useful in this script anyway.
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    setup_logger(distributed_rank=comm.get_rank())
+    return cfg
+
+
+def RAM_msg():
+    vram = psutil.virtual_memory()
+    return "RAM Usage: {:.2f}/{:.2f} GB".format(
+        (vram.total - vram.available) / 1024 ** 3, vram.total / 1024 ** 3
+    )
+
+
+def benchmark_data(args):
+    cfg = setup(args)
+
+    logger.info("After spawning " + RAM_msg())
+    timer = Timer()
+    dataloader = build_detection_train_loader(cfg)
+    logger.info("Initialize loader using {} seconds.".format(timer.seconds()))
+
+    timer.reset()
+    itr = iter(dataloader)
+    for i in range(10):  # warmup
+        next(itr)
+        if i == 0:
+            startup_time = timer.seconds()
+    logger.info("Startup time: {} seconds".format(startup_time))
+    timer = Timer()
+    max_iter = 1000
+    for _ in tqdm.trange(max_iter):
+        next(itr)
+    logger.info(
+        "{} iters ({} images) in {} seconds.".format(
+            max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds()
+        )
+    )
+
+    # test for a few more rounds
+    for k in range(10):
+        logger.info(f"Iteration {k} " + RAM_msg())
+        timer = Timer()
+        max_iter = 1000
+        for _ in tqdm.trange(max_iter):
+            next(itr)
+        logger.info(
+            "{} iters ({} images) in {} seconds.".format(
+                max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds()
+            )
+        )
+
+
+def benchmark_train(args):
+    cfg = setup(args)
+    model = build_model(cfg)
+    logger.info("Model:\n{}".format(model))
+    if comm.get_world_size() > 1:
+        model = DistributedDataParallel(
+            model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
+        )
+    optimizer = build_optimizer(cfg, model)
+    checkpointer = DetectionCheckpointer(model, optimizer=optimizer)
+    checkpointer.load(cfg.MODEL.WEIGHTS)
+
+    cfg.defrost()
+    cfg.DATALOADER.NUM_WORKERS = 2
+    data_loader = build_detection_train_loader(cfg)
+    dummy_data = list(itertools.islice(data_loader, 100))
+
+    def f():
+        data = DatasetFromList(dummy_data, copy=False, serialize=False)
+        while True:
+            yield from data
+
+    max_iter = 400
+    trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(model, f(), optimizer)
+    trainer.register_hooks(
+        [hooks.IterationTimer(), hooks.PeriodicWriter([CommonMetricPrinter(max_iter)])]
+    )
+    trainer.train(1, max_iter)
+
+
+@torch.no_grad()
+def benchmark_eval(args):
+    cfg = setup(args)
+    model = build_model(cfg)
+    model.eval()
+    logger.info("Model:\n{}".format(model))
+    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
+
+    cfg.defrost()
+    cfg.DATALOADER.NUM_WORKERS = 0
+    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
+    dummy_data = DatasetFromList(list(itertools.islice(data_loader, 100)), copy=False)
+
+    def f():
+        while True:
+            yield from dummy_data
+
+    for k in range(5):  # warmup
+        model(dummy_data[k])
+
+    max_iter = 300
+    timer = Timer()
+    with tqdm.tqdm(total=max_iter) as pbar:
+        for idx, d in enumerate(f()):
+            if idx == max_iter:
+                break
+            model(d)
+            pbar.update()
+    logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds()))
+
+
+if __name__ == "__main__":
+    parser = default_argument_parser()
+    parser.add_argument("--task", choices=["train", "eval", "data"], required=True)
+    args = parser.parse_args()
+    assert not args.eval_only
+
+    logger.info("Environment info:\n" + collect_env_info())
+    if args.task == "data":
+        f = benchmark_data
+        print("Initial " + RAM_msg())
+    elif args.task == "train":
+        """
+        Note: training speed may not be representative.
+        The training cost of a R-CNN model varies with the content of the data
+        and the quality of the model.
+        """
+        f = benchmark_train
+    elif args.task == "eval":
+        f = benchmark_eval
+        # only benchmark single-GPU inference.
+        assert args.num_gpus == 1 and args.num_machines == 1
+    launch(f, args.num_gpus, args.num_machines, args.machine_rank, args.dist_url, args=(args,))
diff --git a/src/sts/tools/convert-torchvision-to-d2.py b/src/sts/tools/convert-torchvision-to-d2.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b827d960cca69657e98bd89a9aa5623a847099d
--- /dev/null
+++ b/src/sts/tools/convert-torchvision-to-d2.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import pickle as pkl
+import sys
+import torch
+
+"""
+Usage:
+  # download one of the ResNet{18,34,50,101,152} models from torchvision:
+  wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth
+  # run the conversion
+  ./convert-torchvision-to-d2.py r50.pth r50.pkl
+
+  # Then, use r50.pkl with the following changes in config:
+
+MODEL:
+  WEIGHTS: "/path/to/r50.pkl"
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  RESNETS:
+    DEPTH: 50
+    STRIDE_IN_1X1: False
+INPUT:
+  FORMAT: "RGB"
+
+  These models typically produce slightly worse results than the
+  pre-trained ResNets we use in official configs, which are the
+  original ResNet models released by MSRA.
+"""
+
+if __name__ == "__main__":
+    input = sys.argv[1]
+
+    obj = torch.load(input, map_location="cpu")
+
+    newmodel = {}
+    for k in list(obj.keys()):
+        old_k = k
+        if "layer" not in k:
+            k = "stem." + k
+        for t in [1, 2, 3, 4]:
+            k = k.replace("layer{}".format(t), "res{}".format(t + 1))
+        for t in [1, 2, 3]:
+            k = k.replace("bn{}".format(t), "conv{}.norm".format(t))
+        k = k.replace("downsample.0", "shortcut")
+        k = k.replace("downsample.1", "shortcut.norm")
+        print(old_k, "->", k)
+        newmodel[k] = obj.pop(old_k).detach().numpy()
+
+    res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True}
+
+    with open(sys.argv[2], "wb") as f:
+        pkl.dump(res, f)
+    if obj:
+        print("Unconverted keys:", obj.keys())
diff --git a/src/sts/tools/deploy/CMakeLists.txt b/src/sts/tools/deploy/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18c56abbb8c4b0e1d44d715900ffc8bbd7a84f6b
--- /dev/null
+++ b/src/sts/tools/deploy/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# See https://pytorch.org/tutorials/advanced/cpp_frontend.html
+cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
+project(caffe2_mask_rcnn)
+
+find_package(Torch REQUIRED)
+find_package(gflags REQUIRED)  # needed by caffe2
+find_package(OpenCV REQUIRED)
+find_package(TorchVision REQUIRED)   # needed by export-method=tracing/scripting
+
+add_executable(caffe2_mask_rcnn caffe2_mask_rcnn.cpp)
+target_link_libraries(
+  caffe2_mask_rcnn
+  "${TORCH_LIBRARIES}" gflags glog protobuf ${OpenCV_LIBS})
+set_property(TARGET caffe2_mask_rcnn PROPERTY CXX_STANDARD 14)
+
+
+add_executable(torchscript_traced_mask_rcnn torchscript_traced_mask_rcnn.cpp)
+target_link_libraries(
+  torchscript_traced_mask_rcnn
+  -Wl,--no-as-needed TorchVision::TorchVision -Wl,--as-needed
+  "${TORCH_LIBRARIES}" ${OpenCV_LIBS})
+set_property(TARGET torchscript_traced_mask_rcnn PROPERTY CXX_STANDARD 14)
diff --git a/src/sts/tools/deploy/README.md b/src/sts/tools/deploy/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f716a7b5c07d971c297a19ea50780f9f6369fc4f
--- /dev/null
+++ b/src/sts/tools/deploy/README.md
@@ -0,0 +1,76 @@
+See [deployment tutorial](https://detectron2.readthedocs.io/tutorials/deployment.html)
+for some high-level background about deployment.
+
+This directory contains the following examples:
+
+1. An example script `export_model.py` (previously called `caffe2_converter.py`)
+   that exports a detectron2 model for deployment using different methods and formats.
+
+2. A few C++ examples that run inference with Mask R-CNN model in Caffe2/TorchScript format.
+
+## Build
+All C++ examples depend on libtorch and OpenCV. Some require more dependencies:
+
+* Running caffe2-format models requires:
+  * libtorch built with caffe2 inside
+  * gflags, glog
+  * protobuf library that matches the version used by PyTorch (version defined in `include/caffe2/proto/caffe2.pb.h` of your PyTorch installation)
+  * MKL headers if caffe2 is built with MKL
+* Running TorchScript-format models produced by `--export-method=caffe2_tracing` requires no other dependencies.
+* Running TorchScript-format models produced by `--export-method=tracing` requires libtorchvision (C++ library of torchvision).
+
+We build all examples with one `CMakeLists.txt` that requires all the above dependencies.
+Adjust it if you only need one example.
+As a reference,
+we provide a [Dockerfile](../../docker/deploy.Dockerfile) that
+installs all the above dependencies and builds the C++ examples.
+
+## Use
+
+We show a few example commands to export and execute a Mask R-CNN model in C++.
+
+* `export-method=caffe2_tracing, format=caffe2`:
+```
+./export_model.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
+    --output ./output --export-method caffe2_tracing --format caffe2 \
+    MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl \
+    MODEL.DEVICE cpu
+
+./build/caffe2_mask_rcnn --predict_net=output/model.pb --init_net=output/model_init.pb --input=input.jpg
+```
+
+* `export-method=caffe2_tracing, format=torchscript`:
+
+```
+./export_model.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
+    --output ./output --export-method caffe2_tracing --format torchscript \
+    MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl \
+    MODEL.DEVICE cpu
+
+./build/torchscript_traced_mask_rcnn output/model.ts input.jpg caffe2_tracing
+```
+
+* `export-method=tracing, format=torchscript`:
+
+```
+# this example also tries GPU instead of CPU
+./export_model.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
+    --output ./output --export-method tracing --format torchscript \
+    MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl \
+    MODEL.DEVICE cuda
+
+./build/torchscript_traced_mask_rcnn output/model.ts input.jpg tracing
+```
+
+## Notes:
+
+1. Tracing/Caffe2-tracing requires valid weights & sample inputs.
+   Therefore the above commands require pre-trained models and [COCO dataset](https://detectron2.readthedocs.io/tutorials/builtin_datasets.html).
+   You can modify the script to obtain sample inputs in other ways instead of from COCO.
+
+2. `--run-eval` flag can be used under certain modes
+   (caffe2_tracing with caffe2 format, or tracing with torchscript format)
+   to evaluate the exported model using the dataset in the config.
+   It's recommended to always verify the accuracy in case the conversion is not successful.
+   Evaluation can be slow if model is exported to CPU or dataset is too large ("coco_2017_val_100" is a small subset of COCO useful for evaluation).
+   Caffe2 accuracy may be slightly different (within 0.1 AP) from original model due to numerical precisions between different runtime.
diff --git a/src/sts/tools/deploy/caffe2_mask_rcnn.cpp b/src/sts/tools/deploy/caffe2_mask_rcnn.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e35c657b4d6f62b065da83111cb2bb70d424de97
--- /dev/null
+++ b/src/sts/tools/deploy/caffe2_mask_rcnn.cpp
@@ -0,0 +1,119 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+
+#include <c10/util/Flags.h>
+#include <caffe2/core/blob.h>
+#include <caffe2/core/common.h>
+#include <caffe2/core/init.h>
+#include <caffe2/core/net.h>
+#include <caffe2/core/workspace.h>
+#include <caffe2/utils/proto_utils.h>
+
+#include <opencv2/opencv.hpp>
+#include <cassert>
+#include <chrono>
+#include <iostream>
+#include <string>
+
+C10_DEFINE_string(predict_net, "", "path to model.pb");
+C10_DEFINE_string(init_net, "", "path to model_init.pb");
+C10_DEFINE_string(input, "", "path to input image");
+
+using namespace std;
+using namespace caffe2;
+
+int main(int argc, char** argv) {
+  caffe2::GlobalInit(&argc, &argv);
+  string predictNetPath = FLAGS_predict_net;
+  string initNetPath = FLAGS_init_net;
+  cv::Mat input = cv::imread(FLAGS_input, cv::IMREAD_COLOR);
+
+  const int height = input.rows;
+  const int width = input.cols;
+  // FPN models require divisibility of 32
+  assert(height % 32 == 0 && width % 32 == 0);
+  const int batch = 1;
+  const int channels = 3;
+
+  // initialize Net and Workspace
+  caffe2::NetDef initNet_, predictNet_;
+  CAFFE_ENFORCE(ReadProtoFromFile(initNetPath, &initNet_));
+  CAFFE_ENFORCE(ReadProtoFromFile(predictNetPath, &predictNet_));
+
+  Workspace workSpace;
+  for (auto& str : predictNet_.external_input()) {
+    workSpace.CreateBlob(str);
+  }
+  CAFFE_ENFORCE(workSpace.CreateNet(predictNet_));
+  CAFFE_ENFORCE(workSpace.RunNetOnce(initNet_));
+
+  // setup inputs
+  auto data = BlobGetMutableTensor(workSpace.GetBlob("data"), caffe2::CPU);
+  data->Resize(batch, channels, height, width);
+  float* ptr = data->mutable_data<float>();
+  // HWC to CHW
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < height * width; ++i) {
+      ptr[c * height * width + i] = static_cast<float>(input.data[3 * i + c]);
+    }
+  }
+
+  auto im_info =
+      BlobGetMutableTensor(workSpace.GetBlob("im_info"), caffe2::CPU);
+  im_info->Resize(batch, 3);
+  float* im_info_ptr = im_info->mutable_data<float>();
+  im_info_ptr[0] = height;
+  im_info_ptr[1] = width;
+  im_info_ptr[2] = 1.0;
+
+  // run the network
+  CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
+
+  // run 3 more times to benchmark
+  int N_benchmark = 3;
+  auto start_time = chrono::high_resolution_clock::now();
+  for (int i = 0; i < N_benchmark; ++i) {
+    CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
+  }
+  auto end_time = chrono::high_resolution_clock::now();
+  auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
+                .count();
+  cout << "Latency (should vary with different inputs): "
+       << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
+
+  // parse Mask R-CNN outputs
+  caffe2::Tensor bbox(
+      workSpace.GetBlob("bbox_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
+  caffe2::Tensor scores(
+      workSpace.GetBlob("score_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
+  caffe2::Tensor labels(
+      workSpace.GetBlob("class_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
+  caffe2::Tensor mask_probs(
+      workSpace.GetBlob("mask_fcn_probs")->Get<caffe2::Tensor>(), caffe2::CPU);
+  cout << "bbox:" << bbox.DebugString() << endl;
+  cout << "scores:" << scores.DebugString() << endl;
+  cout << "labels:" << labels.DebugString() << endl;
+  cout << "mask_probs: " << mask_probs.DebugString() << endl;
+
+  int num_instances = bbox.sizes()[0];
+  for (int i = 0; i < num_instances; ++i) {
+    float score = scores.data<float>()[i];
+    if (score < 0.6)
+      continue; // skip them
+
+    const float* box = bbox.data<float>() + i * 4;
+    int label = labels.data<float>()[i];
+
+    cout << "Prediction " << i << ", xyxy=(";
+    cout << box[0] << ", " << box[1] << ", " << box[2] << ", " << box[3]
+         << "); score=" << score << "; label=" << label << endl;
+
+    const float* mask = mask_probs.data<float>() +
+        i * mask_probs.size_from_dim(1) + label * mask_probs.size_from_dim(2);
+
+    // save the 28x28 mask
+    cv::Mat cv_mask(28, 28, CV_32FC1);
+    memcpy(cv_mask.data, mask, 28 * 28 * sizeof(float));
+    cv::imwrite("mask" + std::to_string(i) + ".png", cv_mask * 255.);
+  }
+  return 0;
+}
diff --git a/src/sts/tools/deploy/export_model.py b/src/sts/tools/deploy/export_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..520e4b8dc19eb330873c755e546109c6987f5b45
--- /dev/null
+++ b/src/sts/tools/deploy/export_model.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+import argparse
+import os
+from typing import Dict, List, Tuple
+import onnx
+import torch
+from torch import Tensor, nn
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import build_detection_test_loader
+from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
+from detectron2.export import (
+    Caffe2Tracer,
+    TracingAdapter,
+    add_export_config,
+    dump_torchscript_IR,
+    scripting_with_instances,
+)
+from detectron2.modeling import GeneralizedRCNN, RetinaNet, build_model
+from detectron2.modeling.postprocessing import detector_postprocess
+from detectron2.projects.point_rend import add_pointrend_config
+from detectron2.structures import Boxes
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+
+
+def setup_cfg(args):
+    cfg = get_cfg()
+    # cuda context is initialized before creating dataloader, so we don't fork anymore
+    cfg.DATALOADER.NUM_WORKERS = 0
+    cfg = add_export_config(cfg)
+    add_pointrend_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    return cfg
+
+
+def export_caffe2_tracing(cfg, torch_model, inputs):
+    tracer = Caffe2Tracer(cfg, torch_model, inputs)
+    if args.format == "caffe2":
+        caffe2_model = tracer.export_caffe2()
+        caffe2_model.save_protobuf(args.output)
+        # draw the caffe2 graph
+        caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=inputs)
+        return caffe2_model
+    elif args.format == "onnx":
+        onnx_model = tracer.export_onnx()
+        onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
+    elif args.format == "torchscript":
+        ts_model = tracer.export_torchscript()
+        with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
+            torch.jit.save(ts_model, f)
+        dump_torchscript_IR(ts_model, args.output)
+
+
+# experimental. API not yet final
+def export_scripting(torch_model):
+    assert TORCH_VERSION >= (1, 8)
+    fields = {
+        "proposal_boxes": Boxes,
+        "objectness_logits": Tensor,
+        "pred_boxes": Boxes,
+        "scores": Tensor,
+        "pred_classes": Tensor,
+        "pred_masks": Tensor,
+        "pred_keypoints": torch.Tensor,
+        "pred_keypoint_heatmaps": torch.Tensor,
+    }
+    assert args.format == "torchscript", "Scripting only supports torchscript format."
+
+    class ScriptableAdapterBase(nn.Module):
+        # Use this adapter to workaround https://github.com/pytorch/pytorch/issues/46944
+        # by not retuning instances but dicts. Otherwise the exported model is not deployable
+        def __init__(self):
+            super().__init__()
+            self.model = torch_model
+            self.eval()
+
+    if isinstance(torch_model, GeneralizedRCNN):
+
+        class ScriptableAdapter(ScriptableAdapterBase):
+            def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
+                instances = self.model.inference(inputs, do_postprocess=False)
+                return [i.get_fields() for i in instances]
+
+    else:
+
+        class ScriptableAdapter(ScriptableAdapterBase):
+            def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
+                instances = self.model(inputs)
+                return [i.get_fields() for i in instances]
+
+    ts_model = scripting_with_instances(ScriptableAdapter(), fields)
+    with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
+        torch.jit.save(ts_model, f)
+    dump_torchscript_IR(ts_model, args.output)
+    # TODO inference in Python now missing postprocessing glue code
+    return None
+
+
+# experimental. API not yet final
+def export_tracing(torch_model, inputs):
+    assert TORCH_VERSION >= (1, 8)
+    image = inputs[0]["image"]
+    inputs = [{"image": image}]  # remove other unused keys
+
+    if isinstance(torch_model, GeneralizedRCNN):
+
+        def inference(model, inputs):
+            # use do_postprocess=False so it returns ROI mask
+            inst = model.inference(inputs, do_postprocess=False)[0]
+            return [{"instances": inst}]
+
+    else:
+        inference = None  # assume that we just call the model directly
+
+    traceable_model = TracingAdapter(torch_model, inputs, inference)
+
+    if args.format == "torchscript":
+        ts_model = torch.jit.trace(traceable_model, (image,))
+        with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
+            torch.jit.save(ts_model, f)
+        dump_torchscript_IR(ts_model, args.output)
+    elif args.format == "onnx":
+        # NOTE onnx export currently failing in pytorch
+        with PathManager.open(os.path.join(args.output, "model.onnx"), "wb") as f:
+            torch.onnx.export(traceable_model, (image,), f)
+    logger.info("Inputs schema: " + str(traceable_model.inputs_schema))
+    logger.info("Outputs schema: " + str(traceable_model.outputs_schema))
+
+    if args.format != "torchscript":
+        return None
+    if not isinstance(torch_model, (GeneralizedRCNN, RetinaNet)):
+        return None
+
+    def eval_wrapper(inputs):
+        """
+        The exported model does not contain the final resize step, which is typically
+        unused in deployment but needed for evaluation. We add it manually here.
+        """
+        input = inputs[0]
+        instances = traceable_model.outputs_schema(ts_model(input["image"]))[0]["instances"]
+        postprocessed = detector_postprocess(instances, input["height"], input["width"])
+        return [{"instances": postprocessed}]
+
+    return eval_wrapper
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Export a model for deployment.")
+    parser.add_argument(
+        "--format",
+        choices=["caffe2", "onnx", "torchscript"],
+        help="output format",
+        default="caffe2",
+    )
+    parser.add_argument(
+        "--export-method",
+        choices=["caffe2_tracing", "tracing", "scripting"],
+        help="Method to export models",
+        default="caffe2_tracing",
+    )
+    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
+    parser.add_argument("--run-eval", action="store_true")
+    parser.add_argument("--output", help="output directory for the converted model")
+    parser.add_argument(
+        "opts",
+        help="Modify config options using the command-line",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+    args = parser.parse_args()
+    logger = setup_logger()
+    logger.info("Command line arguments: " + str(args))
+    PathManager.mkdirs(args.output)
+    # Disable respecialization on new shapes. Otherwise --run-eval will be slow
+    torch._C._jit_set_bailout_depth(1)
+
+    cfg = setup_cfg(args)
+
+    # create a torch model
+    torch_model = build_model(cfg)
+    DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
+    torch_model.eval()
+
+    # get a sample data
+    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
+    first_batch = next(iter(data_loader))
+
+    # convert and save model
+    if args.export_method == "caffe2_tracing":
+        exported_model = export_caffe2_tracing(cfg, torch_model, first_batch)
+    elif args.export_method == "scripting":
+        exported_model = export_scripting(torch_model)
+    elif args.export_method == "tracing":
+        exported_model = export_tracing(torch_model, first_batch)
+
+    # run evaluation with the converted model
+    if args.run_eval:
+        assert exported_model is not None, (
+            "Python inference is not yet implemented for "
+            f"export_method={args.export_method}, format={args.format}."
+        )
+        logger.info("Running evaluation ... this takes a long time if you export to CPU.")
+        dataset = cfg.DATASETS.TEST[0]
+        data_loader = build_detection_test_loader(cfg, dataset)
+        # NOTE: hard-coded evaluator. change to the evaluator for your dataset
+        evaluator = COCOEvaluator(dataset, output_dir=args.output)
+        metrics = inference_on_dataset(exported_model, data_loader, evaluator)
+        print_csv_format(metrics)
diff --git a/src/sts/tools/deploy/torchscript_traced_mask_rcnn.cpp b/src/sts/tools/deploy/torchscript_traced_mask_rcnn.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e07c55c9d0ef57ab668ce0ba23535b986e4d17d1
--- /dev/null
+++ b/src/sts/tools/deploy/torchscript_traced_mask_rcnn.cpp
@@ -0,0 +1,123 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+// @lint-ignore-every CLANGTIDY
+
+#include <opencv2/opencv.hpp>
+#include <iostream>
+#include <string>
+
+#include <c10/cuda/CUDAStream.h>
+#include <torch/csrc/autograd/grad_mode.h>
+#include <torch/csrc/jit/runtime/graph_executor.h>
+#include <torch/script.h>
+
+// only needed for export_method=tracing
+#include <torchvision/vision.h> // @oss-only
+// @fb-only: #include <torchvision/csrc/vision.h>
+
+using namespace std;
+
+c10::IValue get_caffe2_tracing_inputs(cv::Mat& img, c10::Device device) {
+  const int height = img.rows;
+  const int width = img.cols;
+  // FPN models require divisibility of 32.
+  // Tracing mode does padding inside the graph, but caffe2_tracing does not.
+  assert(height % 32 == 0 && width % 32 == 0);
+  const int channels = 3;
+
+  auto input =
+      torch::from_blob(img.data, {1, height, width, channels}, torch::kUInt8);
+  // NHWC to NCHW
+  input = input.to(device, torch::kFloat).permute({0, 3, 1, 2}).contiguous();
+
+  std::array<float, 3> im_info_data{height * 1.0f, width * 1.0f, 1.0f};
+  auto im_info =
+      torch::from_blob(im_info_data.data(), {1, 3}).clone().to(device);
+  return std::make_tuple(input, im_info);
+}
+
+c10::IValue get_tracing_inputs(cv::Mat& img, c10::Device device) {
+  const int height = img.rows;
+  const int width = img.cols;
+  const int channels = 3;
+
+  auto input =
+      torch::from_blob(img.data, {height, width, channels}, torch::kUInt8);
+  // HWC to CHW
+  input = input.to(device, torch::kFloat).permute({2, 0, 1}).contiguous();
+  return input;
+}
+
+int main(int argc, const char* argv[]) {
+  if (argc != 4) {
+    cerr << R"xx(
+Usage:
+   ./torchscript_traced_mask_rcnn model.ts input.jpg EXPORT_METHOD
+
+   EXPORT_METHOD can be "tracing" or "caffe2_tracing".
+)xx";
+    return 1;
+  }
+  std::string image_file = argv[2];
+  std::string export_method = argv[3];
+  assert(export_method == "caffe2_tracing" || export_method == "tracing");
+  bool is_caffe2 = export_method == "caffe2_tracing";
+
+  torch::jit::getBailoutDepth() = 1;
+  torch::autograd::AutoGradMode guard(false);
+  auto module = torch::jit::load(argv[1]);
+
+  assert(module.buffers().size() > 0);
+  // Assume that the entire model is on the same device.
+  // We just put input to this device.
+  auto device = (*begin(module.buffers())).device();
+
+  cv::Mat input_img = cv::imread(image_file, cv::IMREAD_COLOR);
+  auto inputs = is_caffe2 ? get_caffe2_tracing_inputs(input_img, device)
+                          : get_tracing_inputs(input_img, device);
+
+  // run the network
+  auto output = module.forward({inputs});
+  if (device.is_cuda())
+    c10::cuda::getCurrentCUDAStream().synchronize();
+
+  // run 3 more times to benchmark
+  int N_benchmark = 3, N_warmup = 1;
+  auto start_time = chrono::high_resolution_clock::now();
+  for (int i = 0; i < N_benchmark + N_warmup; ++i) {
+    if (i == N_warmup)
+      start_time = chrono::high_resolution_clock::now();
+    output = module.forward({inputs});
+    if (device.is_cuda())
+      c10::cuda::getCurrentCUDAStream().synchronize();
+  }
+  auto end_time = chrono::high_resolution_clock::now();
+  auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
+                .count();
+  cout << "Latency (should vary with different inputs): "
+       << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
+
+  auto outputs = output.toTuple()->elements();
+  cout << "Number of output tensors: " << outputs.size() << endl;
+  at::Tensor bbox, pred_classes, pred_masks, scores;
+  // parse Mask R-CNN outputs
+  if (is_caffe2) {
+    bbox = outputs[0].toTensor(), scores = outputs[1].toTensor(),
+    pred_classes = outputs[2].toTensor(), pred_masks = outputs[3].toTensor();
+  } else {
+    bbox = outputs[0].toTensor(), pred_classes = outputs[1].toTensor(),
+    pred_masks = outputs[2].toTensor(), scores = outputs[3].toTensor();
+    // outputs[-1] is image_size, others fields ordered by their field name in
+    // Instances
+  }
+
+  cout << "bbox: " << bbox.toString() << " " << bbox.sizes() << endl;
+  cout << "scores: " << scores.toString() << " " << scores.sizes() << endl;
+  cout << "pred_classes: " << pred_classes.toString() << " "
+       << pred_classes.sizes() << endl;
+  cout << "pred_masks: " << pred_masks.toString() << " " << pred_masks.sizes()
+       << endl;
+
+  int num_instances = bbox.sizes()[0];
+  cout << bbox << endl;
+  return 0;
+}
diff --git a/src/sts/tools/plain_train_net.py b/src/sts/tools/plain_train_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c08f877b9bc3a054802caeafb41e4625adba1f4
--- /dev/null
+++ b/src/sts/tools/plain_train_net.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Detectron2 training script with a plain training loop.
+
+This script reads a given config file and runs the training or evaluation.
+It is an entry point that is able to train standard models in detectron2.
+
+In order to let one script support training of many models,
+this script contains logic that are specific to these built-in models and therefore
+may not be suitable for your own project.
+For example, your research project perhaps only needs a single "evaluator".
+
+Therefore, we recommend you to use detectron2 as a library and take
+this file as an example of how to use the library.
+You may want to write your own script with your datasets and other customizations.
+
+Compared to "train_net.py", this script supports fewer default features.
+It also includes fewer abstraction, therefore is easier to add custom logic.
+"""
+
+import logging
+import os
+from collections import OrderedDict
+import torch
+from torch.nn.parallel import DistributedDataParallel
+
+import detectron2.utils.comm as comm
+from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import (
+    MetadataCatalog,
+    build_detection_test_loader,
+    build_detection_train_loader,
+)
+from detectron2.engine import default_argument_parser, default_setup, default_writers, launch
+from detectron2.evaluation import (
+    CityscapesInstanceEvaluator,
+    CityscapesSemSegEvaluator,
+    COCOEvaluator,
+    COCOPanopticEvaluator,
+    DatasetEvaluators,
+    LVISEvaluator,
+    PascalVOCDetectionEvaluator,
+    SemSegEvaluator,
+    inference_on_dataset,
+    print_csv_format,
+)
+from detectron2.modeling import build_model
+from detectron2.solver import build_lr_scheduler, build_optimizer
+from detectron2.utils.events import EventStorage
+
+logger = logging.getLogger("detectron2")
+
+
+def get_evaluator(cfg, dataset_name, output_folder=None):
+    """
+    Create evaluator(s) for a given dataset.
+    This uses the special metadata "evaluator_type" associated with each builtin dataset.
+    For your own dataset, you can simply create an evaluator manually in your
+    script and do not have to worry about the hacky if-else logic here.
+    """
+    if output_folder is None:
+        output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+    evaluator_list = []
+    evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
+    if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
+        evaluator_list.append(
+            SemSegEvaluator(
+                dataset_name,
+                distributed=True,
+                output_dir=output_folder,
+            )
+        )
+    if evaluator_type in ["coco", "coco_panoptic_seg"]:
+        evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
+    if evaluator_type == "coco_panoptic_seg":
+        evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
+    if evaluator_type == "cityscapes_instance":
+        assert (
+            torch.cuda.device_count() >= comm.get_rank()
+        ), "CityscapesEvaluator currently do not work with multiple machines."
+        return CityscapesInstanceEvaluator(dataset_name)
+    if evaluator_type == "cityscapes_sem_seg":
+        assert (
+            torch.cuda.device_count() >= comm.get_rank()
+        ), "CityscapesEvaluator currently do not work with multiple machines."
+        return CityscapesSemSegEvaluator(dataset_name)
+    if evaluator_type == "pascal_voc":
+        return PascalVOCDetectionEvaluator(dataset_name)
+    if evaluator_type == "lvis":
+        return LVISEvaluator(dataset_name, cfg, True, output_folder)
+    if len(evaluator_list) == 0:
+        raise NotImplementedError(
+            "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type)
+        )
+    if len(evaluator_list) == 1:
+        return evaluator_list[0]
+    return DatasetEvaluators(evaluator_list)
+
+
+def do_test(cfg, model):
+    results = OrderedDict()
+    for dataset_name in cfg.DATASETS.TEST:
+        data_loader = build_detection_test_loader(cfg, dataset_name)
+        evaluator = get_evaluator(
+            cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
+        )
+        results_i = inference_on_dataset(model, data_loader, evaluator)
+        results[dataset_name] = results_i
+        if comm.is_main_process():
+            logger.info("Evaluation results for {} in csv format:".format(dataset_name))
+            print_csv_format(results_i)
+    if len(results) == 1:
+        results = list(results.values())[0]
+    return results
+
+
+def do_train(cfg, model, resume=False):
+    model.train()
+    optimizer = build_optimizer(cfg, model)
+    scheduler = build_lr_scheduler(cfg, optimizer)
+
+    checkpointer = DetectionCheckpointer(
+        model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler
+    )
+    start_iter = (
+        checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1
+    )
+    max_iter = cfg.SOLVER.MAX_ITER
+
+    periodic_checkpointer = PeriodicCheckpointer(
+        checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter
+    )
+
+    writers = default_writers(cfg.OUTPUT_DIR, max_iter) if comm.is_main_process() else []
+
+    # compared to "train_net.py", we do not support accurate timing and
+    # precise BN here, because they are not trivial to implement in a small training loop
+    data_loader = build_detection_train_loader(cfg)
+    logger.info("Starting training from iteration {}".format(start_iter))
+    with EventStorage(start_iter) as storage:
+        for data, iteration in zip(data_loader, range(start_iter, max_iter)):
+            storage.iter = iteration
+
+            loss_dict = model(data)
+            losses = sum(loss_dict.values())
+            assert torch.isfinite(losses).all(), loss_dict
+
+            loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()}
+            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
+            if comm.is_main_process():
+                storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced)
+
+            optimizer.zero_grad()
+            losses.backward()
+            optimizer.step()
+            storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False)
+            scheduler.step()
+
+            if (
+                cfg.TEST.EVAL_PERIOD > 0
+                and (iteration + 1) % cfg.TEST.EVAL_PERIOD == 0
+                and iteration != max_iter - 1
+            ):
+                do_test(cfg, model)
+                # Compared to "train_net.py", the test results are not dumped to EventStorage
+                comm.synchronize()
+
+            if iteration - start_iter > 5 and (
+                (iteration + 1) % 20 == 0 or iteration == max_iter - 1
+            ):
+                for writer in writers:
+                    writer.write()
+            periodic_checkpointer.step(iteration)
+
+
+def setup(args):
+    """
+    Create configs and perform basic setups.
+    """
+    cfg = get_cfg()
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    default_setup(
+        cfg, args
+    )  # if you don't like any of the default setup, write your own setup code
+    return cfg
+
+
+def main(args):
+    cfg = setup(args)
+
+    model = build_model(cfg)
+    logger.info("Model:\n{}".format(model))
+    if args.eval_only:
+        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
+            cfg.MODEL.WEIGHTS, resume=args.resume
+        )
+        return do_test(cfg, model)
+
+    distributed = comm.get_world_size() > 1
+    if distributed:
+        model = DistributedDataParallel(
+            model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
+        )
+
+    do_train(cfg, model, resume=args.resume)
+    return do_test(cfg, model)
+
+
+if __name__ == "__main__":
+    args = default_argument_parser().parse_args()
+    print("Command Line Args:", args)
+    launch(
+        main,
+        args.num_gpus,
+        num_machines=args.num_machines,
+        machine_rank=args.machine_rank,
+        dist_url=args.dist_url,
+        args=(args,),
+    )
diff --git a/src/sts/tools/train_net.py b/src/sts/tools/train_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..609f7e5d25c012fa6985178e738b47517dc544b3
--- /dev/null
+++ b/src/sts/tools/train_net.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Detection Training Script.
+
+This scripts reads a given config file and runs the training or evaluation.
+It is an entry point that is made to train standard models in detectron2.
+
+In order to let one script support training of many models,
+this script contains logic that are specific to these built-in models and therefore
+may not be suitable for your own project.
+For example, your research project perhaps only needs a single "evaluator".
+
+Therefore, we recommend you to use detectron2 as an library and take
+this file as an example of how to use the library.
+You may want to write your own script with your datasets and other customizations.
+"""
+
+import logging
+import os
+from collections import OrderedDict
+import torch
+
+import detectron2.utils.comm as comm
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import MetadataCatalog
+from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
+from detectron2.evaluation import (
+    CityscapesInstanceEvaluator,
+    CityscapesSemSegEvaluator,
+    COCOEvaluator,
+    COCOPanopticEvaluator,
+    DatasetEvaluators,
+    LVISEvaluator,
+    PascalVOCDetectionEvaluator,
+    SemSegEvaluator,
+    verify_results,
+)
+from detectron2.modeling import GeneralizedRCNNWithTTA
+
+
+class Trainer(DefaultTrainer):
+    """
+    We use the "DefaultTrainer" which contains pre-defined default logic for
+    standard training workflow. They may not work for you, especially if you
+    are working on a new research project. In that case you can write your
+    own training loop. You can use "tools/plain_train_net.py" as an example.
+    """
+
+    @classmethod
+    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
+        """
+        Create evaluator(s) for a given dataset.
+        This uses the special metadata "evaluator_type" associated with each builtin dataset.
+        For your own dataset, you can simply create an evaluator manually in your
+        script and do not have to worry about the hacky if-else logic here.
+        """
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        evaluator_list = []
+        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
+        if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
+            evaluator_list.append(
+                SemSegEvaluator(
+                    dataset_name,
+                    distributed=True,
+                    output_dir=output_folder,
+                )
+            )
+        if evaluator_type in ["coco", "coco_panoptic_seg"]:
+            evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
+        if evaluator_type == "coco_panoptic_seg":
+            evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
+        if evaluator_type == "cityscapes_instance":
+            assert (
+                torch.cuda.device_count() >= comm.get_rank()
+            ), "CityscapesEvaluator currently do not work with multiple machines."
+            return CityscapesInstanceEvaluator(dataset_name)
+        if evaluator_type == "cityscapes_sem_seg":
+            assert (
+                torch.cuda.device_count() >= comm.get_rank()
+            ), "CityscapesEvaluator currently do not work with multiple machines."
+            return CityscapesSemSegEvaluator(dataset_name)
+        elif evaluator_type == "pascal_voc":
+            return PascalVOCDetectionEvaluator(dataset_name)
+        elif evaluator_type == "lvis":
+            return LVISEvaluator(dataset_name, output_dir=output_folder)
+        if len(evaluator_list) == 0:
+            raise NotImplementedError(
+                "no Evaluator for the dataset {} with the type {}".format(
+                    dataset_name, evaluator_type
+                )
+            )
+        elif len(evaluator_list) == 1:
+            return evaluator_list[0]
+        return DatasetEvaluators(evaluator_list)
+
+    @classmethod
+    def test_with_TTA(cls, cfg, model):
+        logger = logging.getLogger("detectron2.trainer")
+        # In the end of training, run an evaluation with TTA
+        # Only support some R-CNN models.
+        logger.info("Running inference with test-time augmentation ...")
+        model = GeneralizedRCNNWithTTA(cfg, model)
+        evaluators = [
+            cls.build_evaluator(
+                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
+            )
+            for name in cfg.DATASETS.TEST
+        ]
+        res = cls.test(cfg, model, evaluators)
+        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
+        return res
+
+
+def setup(args):
+    """
+    Create configs and perform basic setups.
+    """
+    cfg = get_cfg()
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    default_setup(cfg, args)
+    return cfg
+
+
+def main(args):
+    cfg = setup(args)
+
+    if args.eval_only:
+        model = Trainer.build_model(cfg)
+        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
+            cfg.MODEL.WEIGHTS, resume=args.resume
+        )
+        res = Trainer.test(cfg, model)
+        if cfg.TEST.AUG.ENABLED:
+            res.update(Trainer.test_with_TTA(cfg, model))
+        if comm.is_main_process():
+            verify_results(cfg, res)
+        return res
+
+    """
+    If you'd like to do anything fancier than the standard training logic,
+    consider writing your own training loop (see plain_train_net.py) or
+    subclassing the trainer.
+    """
+    trainer = Trainer(cfg)
+    trainer.resume_or_load(resume=args.resume)
+    if cfg.TEST.AUG.ENABLED:
+        trainer.register_hooks(
+            [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
+        )
+    return trainer.train()
+
+
+if __name__ == "__main__":
+    args = default_argument_parser().parse_args()
+    print("Command Line Args:", args)
+    launch(
+        main,
+        args.num_gpus,
+        num_machines=args.num_machines,
+        machine_rank=args.machine_rank,
+        dist_url=args.dist_url,
+        args=(args,),
+    )
diff --git a/src/sts/tools/visualize_data.py b/src/sts/tools/visualize_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd0ba8347bfd34fc8fac5ffef9aee10915ad1820
--- /dev/null
+++ b/src/sts/tools/visualize_data.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+import argparse
+import os
+from itertools import chain
+import cv2
+import tqdm
+
+from detectron2.config import get_cfg
+from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader
+from detectron2.data import detection_utils as utils
+from detectron2.data.build import filter_images_with_few_keypoints
+from detectron2.utils.logger import setup_logger
+from detectron2.utils.visualizer import Visualizer
+
+
+def setup(args):
+    cfg = get_cfg()
+    if args.config_file:
+        cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.DATALOADER.NUM_WORKERS = 0
+    cfg.freeze()
+    return cfg
+
+
+def parse_args(in_args=None):
+    parser = argparse.ArgumentParser(description="Visualize ground-truth data")
+    parser.add_argument(
+        "--source",
+        choices=["annotation", "dataloader"],
+        required=True,
+        help="visualize the annotations or the data loader (with pre-processing)",
+    )
+    parser.add_argument("--config-file", metavar="FILE", help="path to config file")
+    parser.add_argument("--output-dir", default="./", help="path to output directory")
+    parser.add_argument("--show", action="store_true", help="show output in a window")
+    parser.add_argument(
+        "opts",
+        help="Modify config options using the command-line",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+    return parser.parse_args(in_args)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    logger = setup_logger()
+    logger.info("Arguments: " + str(args))
+    cfg = setup(args)
+
+    dirname = args.output_dir
+    os.makedirs(dirname, exist_ok=True)
+    metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
+
+    def output(vis, fname):
+        if args.show:
+            print(fname)
+            cv2.imshow("window", vis.get_image()[:, :, ::-1])
+            cv2.waitKey()
+        else:
+            filepath = os.path.join(dirname, fname)
+            print("Saving to {} ...".format(filepath))
+            vis.save(filepath)
+
+    scale = 1.0
+    if args.source == "dataloader":
+        train_data_loader = build_detection_train_loader(cfg)
+        for batch in train_data_loader:
+            for per_image in batch:
+                # Pytorch tensor is in (C, H, W) format
+                img = per_image["image"].permute(1, 2, 0).cpu().detach().numpy()
+                img = utils.convert_image_to_rgb(img, cfg.INPUT.FORMAT)
+
+                visualizer = Visualizer(img, metadata=metadata, scale=scale)
+                target_fields = per_image["instances"].get_fields()
+                labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]]
+                vis = visualizer.overlay_instances(
+                    labels=labels,
+                    boxes=target_fields.get("gt_boxes", None),
+                    masks=target_fields.get("gt_masks", None),
+                    keypoints=target_fields.get("gt_keypoints", None),
+                )
+                output(vis, str(per_image["image_id"]) + ".jpg")
+    else:
+        dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]))
+        if cfg.MODEL.KEYPOINT_ON:
+            dicts = filter_images_with_few_keypoints(dicts, 1)
+        for dic in tqdm.tqdm(dicts):
+            img = utils.read_image(dic["file_name"], "RGB")
+            visualizer = Visualizer(img, metadata=metadata, scale=scale)
+            vis = visualizer.draw_dataset_dict(dic)
+            output(vis, os.path.basename(dic["file_name"]))
diff --git a/src/sts/tools/visualize_json_results.py b/src/sts/tools/visualize_json_results.py
new file mode 100644
index 0000000000000000000000000000000000000000..472190e0b3b38b55773795915badbb5bc4599d42
--- /dev/null
+++ b/src/sts/tools/visualize_json_results.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import argparse
+import json
+import numpy as np
+import os
+from collections import defaultdict
+import cv2
+import tqdm
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import Boxes, BoxMode, Instances
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+from detectron2.utils.visualizer import Visualizer
+
+
+def create_instances(predictions, image_size):
+    ret = Instances(image_size)
+
+    score = np.asarray([x["score"] for x in predictions])
+    chosen = (score > args.conf_threshold).nonzero()[0]
+    score = score[chosen]
+    bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4)
+    bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
+
+    labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen])
+
+    ret.scores = score
+    ret.pred_boxes = Boxes(bbox)
+    ret.pred_classes = labels
+
+    try:
+        ret.pred_masks = [predictions[i]["segmentation"] for i in chosen]
+    except KeyError:
+        pass
+    return ret
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="A script that visualizes the json predictions from COCO or LVIS dataset."
+    )
+    parser.add_argument("--input", required=True, help="JSON file produced by the model")
+    parser.add_argument("--output", required=True, help="output directory")
+    parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val")
+    parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold")
+    args = parser.parse_args()
+
+    logger = setup_logger()
+
+    with PathManager.open(args.input, "r") as f:
+        predictions = json.load(f)
+
+    pred_by_image = defaultdict(list)
+    for p in predictions:
+        pred_by_image[p["image_id"]].append(p)
+
+    dicts = list(DatasetCatalog.get(args.dataset))
+    metadata = MetadataCatalog.get(args.dataset)
+    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
+
+        def dataset_id_map(ds_id):
+            return metadata.thing_dataset_id_to_contiguous_id[ds_id]
+
+    elif "lvis" in args.dataset:
+        # LVIS results are in the same format as COCO results, but have a different
+        # mapping from dataset category id to contiguous category id in [0, #categories - 1]
+        def dataset_id_map(ds_id):
+            return ds_id - 1
+
+    else:
+        raise ValueError("Unsupported dataset: {}".format(args.dataset))
+
+    os.makedirs(args.output, exist_ok=True)
+
+    for dic in tqdm.tqdm(dicts):
+        img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1]
+        basename = os.path.basename(dic["file_name"])
+
+        predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2])
+        vis = Visualizer(img, metadata)
+        vis_pred = vis.draw_instance_predictions(predictions).get_image()
+
+        vis = Visualizer(img, metadata)
+        vis_gt = vis.draw_dataset_dict(dic).get_image()
+
+        concat = np.concatenate((vis_pred, vis_gt), axis=1)
+        cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1])
diff --git a/src/tracker/.gitignore b/src/tracker/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..000c1d7066ecddfbed53fcc782e2c778deec4689
--- /dev/null
+++ b/src/tracker/.gitignore
@@ -0,0 +1,120 @@
+checkpoint/*
+lightning_logs/*
+outputs/*
+ids/*
+legacy/*
+.DS_Store
+debug/*
+*.DS_Store
+*.mat
+preds/*
+*.h5
+*.mp4
+*.jpg
+*.png
+*.checkpoint
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# Pycharm
+.idea
+
+sts/build
\ No newline at end of file
diff --git a/src/tracker/MANIFEST.in b/src/tracker/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..e149bfc73fbf486d2d7d926b43da315e495bd5ca
--- /dev/null
+++ b/src/tracker/MANIFEST.in
@@ -0,0 +1,3 @@
+include README.md
+include LICENSE
+include requirements.txt
\ No newline at end of file
diff --git a/src/tracker/README.md b/src/tracker/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4436f70589b27b6cd8682532d19f8b83ef814c2b
--- /dev/null
+++ b/src/tracker/README.md
@@ -0,0 +1,38 @@
+# Signboard Tracking
+
+## Installation
+
+
+### Training
+
+```
+
+```
+
+### Available
+
+[Checkpoint](https://drive.google.com/drive/folders/1byNi881swx8RhY1caUxZQ_D0_7rmnY_3?usp=drive_link)
+
+## Usage
+
+### Signboard Segment
+
+Mask RCNN: [DaoLQ](./signboard_segment/)
+
+```
+
+```
+
+### Scene Text Recognition
+
+SwinTextSpotter: [KienVS](./sts/README.md)
+
+```
+
+```
+
+## Copyright
+
+```
+FIMO @ 2023
+```
\ No newline at end of file
diff --git a/src/tracker/README_deepsort.md b/src/tracker/README_deepsort.md
new file mode 100644
index 0000000000000000000000000000000000000000..177bab04a834eb111b56f96e31149081e7a70f46
--- /dev/null
+++ b/src/tracker/README_deepsort.md
@@ -0,0 +1,141 @@
+# Deep SORT
+
+## Introduction
+
+This repository contains code for *Simple Online and Realtime Tracking with a Deep Association Metric* (Deep SORT).
+We extend the original [SORT](https://github.com/abewley/sort) algorithm to
+integrate appearance information based on a deep appearance descriptor.
+See the [arXiv preprint](https://arxiv.org/abs/1703.07402) for more information.
+
+## Dependencies
+
+The code is compatible with Python 2.7 and 3. The following dependencies are
+needed to run the tracker:
+
+* NumPy
+* sklearn
+* OpenCV
+
+Additionally, feature generation requires TensorFlow (>= 1.0).
+
+## Installation
+
+First, clone the repository:
+```
+git clone https://github.com/nwojke/deep_sort.git
+```
+Then, download pre-generated detections and the CNN checkpoint file from
+[here](https://drive.google.com/open?id=18fKzfqnqhqW3s9zwsCbnVJ5XF2JFeqMp).
+
+*NOTE:* The candidate object locations of our pre-generated detections are
+taken from the following paper:
+```
+F. Yu, W. Li, Q. Li, Y. Liu, X. Shi, J. Yan. POI: Multiple Object Tracking with
+High Performance Detection and Appearance Feature. In BMTT, SenseTime Group
+Limited, 2016.
+```
+We have replaced the appearance descriptor with a custom deep convolutional
+neural network (see below).
+
+## Running the tracker
+
+The following example starts the tracker on one of the
+[MOT16 benchmark](https://motchallenge.net/data/MOT16/)
+sequences.
+We assume resources have been extracted to the repository root directory and
+the MOT16 benchmark data is in `./MOT16`:
+```
+python deep_sort_app.py \
+    --sequence_dir=./MOT16/test/MOT16-06 \
+    --detection_file=./resources/detections/MOT16_POI_test/MOT16-06.npy \
+    --min_confidence=0.3 \
+    --nn_budget=100 \
+    --display=True
+```
+Check `python deep_sort_app.py -h` for an overview of available options.
+There are also scripts in the repository to visualize results, generate videos,
+and evaluate the MOT challenge benchmark.
+
+## Generating detections
+
+Beside the main tracking application, this repository contains a script to
+generate features for person re-identification, suitable to compare the visual
+appearance of pedestrian bounding boxes using cosine similarity.
+The following example generates these features from standard MOT challenge
+detections. Again, we assume resources have been extracted to the repository
+root directory and MOT16 data is in `./MOT16`:
+```
+python tools/generate_detections.py \
+    --model=resources/networks/mars-small128.pb \
+    --mot_dir=./MOT16/train \
+    --output_dir=./resources/detections/MOT16_train
+```
+The model has been generated with TensorFlow 1.5. If you run into
+incompatibility, re-export the frozen inference graph to obtain a new
+`mars-small128.pb` that is compatible with your version:
+```
+python tools/freeze_model.py
+```
+The ``generate_detections.py`` stores for each sequence of the MOT16 dataset
+a separate binary file in NumPy native format. Each file contains an array of
+shape `Nx138`, where N is the number of detections in the corresponding MOT
+sequence. The first 10 columns of this array contain the raw MOT detection
+copied over from the input file. The remaining 128 columns store the appearance
+descriptor. The files generated by this command can be used as input for the
+`deep_sort_app.py`.
+
+**NOTE**: If ``python tools/generate_detections.py`` raises a TensorFlow error,
+try passing an absolute path to the ``--model`` argument. This might help in
+some cases.
+
+## Training the model
+
+To train the deep association metric model we used a novel [cosine metric learning](https://github.com/nwojke/cosine_metric_learning) approach which is provided as a separate repository.
+
+## Highlevel overview of source files
+
+In the top-level directory are executable scripts to execute, evaluate, and
+visualize the tracker. The main entry point is in `deep_sort_app.py`.
+This file runs the tracker on a MOTChallenge sequence.
+
+In package `deep_sort` is the main tracking code:
+
+* `detection.py`: Detection base class.
+* `kalman_filter.py`: A Kalman filter implementation and concrete
+   parametrization for image space filtering.
+* `linear_assignment.py`: This module contains code for min cost matching and
+   the matching cascade.
+* `iou_matching.py`: This module contains the IOU matching metric.
+* `nn_matching.py`: A module for a nearest neighbor matching metric.
+* `track.py`: The track class contains single-target track data such as Kalman
+  state, number of hits, misses, hit streak, associated feature vectors, etc.
+* `tracker.py`: This is the multi-target tracker class.
+
+The `deep_sort_app.py` expects detections in a custom format, stored in .npy
+files. These can be computed from MOTChallenge detections using
+`generate_detections.py`. We also provide
+[pre-generated detections](https://drive.google.com/open?id=1VVqtL0klSUvLnmBKS89il1EKC3IxUBVK).
+
+## Citing DeepSORT
+
+If you find this repo useful in your research, please consider citing the following papers:
+
+    @inproceedings{Wojke2017simple,
+      title={Simple Online and Realtime Tracking with a Deep Association Metric},
+      author={Wojke, Nicolai and Bewley, Alex and Paulus, Dietrich},
+      booktitle={2017 IEEE International Conference on Image Processing (ICIP)},
+      year={2017},
+      pages={3645--3649},
+      organization={IEEE},
+      doi={10.1109/ICIP.2017.8296962}
+    }
+
+    @inproceedings{Wojke2018deep,
+      title={Deep Cosine Metric Learning for Person Re-identification},
+      author={Wojke, Nicolai and Bewley, Alex},
+      booktitle={2018 IEEE Winter Conference on Applications of Computer Vision (WACV)},
+      year={2018},
+      pages={748--756},
+      organization={IEEE},
+      doi={10.1109/WACV.2018.00087}
+    }
\ No newline at end of file
diff --git a/src/tracker/README_maskrcnn.md b/src/tracker/README_maskrcnn.md
new file mode 100644
index 0000000000000000000000000000000000000000..55675e4dd8075457e2e2159e6b78a647f4231ebd
--- /dev/null
+++ b/src/tracker/README_maskrcnn.md
@@ -0,0 +1,241 @@
+# Mask R-CNN for Object Detection and Segmentation
+
+This is an implementation of [Mask R-CNN](https://arxiv.org/abs/1703.06870) on Python 3, Keras, and TensorFlow. The model generates bounding boxes and segmentation masks for each instance of an object in the image. It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.
+
+![Instance Segmentation Sample](assets/street.png)
+
+The repository includes:
+* Source code of Mask R-CNN built on FPN and ResNet101.
+* Training code for MS COCO
+* Pre-trained weights for MS COCO
+* Jupyter notebooks to visualize the detection pipeline at every step
+* ParallelModel class for multi-GPU training
+* Evaluation on MS COCO metrics (AP)
+* Example of training on your own dataset
+
+
+The code is documented and designed to be easy to extend. If you use it in your research, please consider citing this repository (bibtex below). If you work on 3D vision, you might find our recently released [Matterport3D](https://matterport.com/blog/2017/09/20/announcing-matterport3d-research-dataset/) dataset useful as well.
+This dataset was created from 3D-reconstructed spaces captured by our customers who agreed to make them publicly available for academic use. You can see more examples [here](https://matterport.com/gallery/).
+
+# Getting Started
+* [demo.ipynb](samples/demo.ipynb) Is the easiest way to start. It shows an example of using a model pre-trained on MS COCO to segment objects in your own images.
+It includes code to run object detection and instance segmentation on arbitrary images.
+
+* [train_shapes.ipynb](samples/shapes/train_shapes.ipynb) shows how to train Mask R-CNN on your own dataset. This notebook introduces a toy dataset (Shapes) to demonstrate training on a new dataset.
+
+* ([model.py](mrcnn/model.py), [utils.py](mrcnn/utils.py), [config.py](mrcnn/config.py)): These files contain the main Mask RCNN implementation. 
+
+
+* [inspect_data.ipynb](samples/coco/inspect_data.ipynb). This notebook visualizes the different pre-processing steps
+to prepare the training data.
+
+* [inspect_model.ipynb](samples/coco/inspect_model.ipynb) This notebook goes in depth into the steps performed to detect and segment objects. It provides visualizations of every step of the pipeline.
+
+* [inspect_weights.ipynb](samples/coco/inspect_weights.ipynb)
+This notebooks inspects the weights of a trained model and looks for anomalies and odd patterns.
+
+
+# Step by Step Detection
+To help with debugging and understanding the model, there are 3 notebooks 
+([inspect_data.ipynb](samples/coco/inspect_data.ipynb), [inspect_model.ipynb](samples/coco/inspect_model.ipynb),
+[inspect_weights.ipynb](samples/coco/inspect_weights.ipynb)) that provide a lot of visualizations and allow running the model step by step to inspect the output at each point. Here are a few examples:
+
+
+
+## 1. Anchor sorting and filtering
+Visualizes every step of the first stage Region Proposal Network and displays positive and negative anchors along with anchor box refinement.
+![](assets/detection_anchors.png)
+
+## 2. Bounding Box Refinement
+This is an example of final detection boxes (dotted lines) and the refinement applied to them (solid lines) in the second stage.
+![](assets/detection_refinement.png)
+
+## 3. Mask Generation
+Examples of generated masks. These then get scaled and placed on the image in the right location.
+
+![](assets/detection_masks.png)
+
+## 4.Layer activations
+Often it's useful to inspect the activations at different layers to look for signs of trouble (all zeros or random noise).
+
+![](assets/detection_activations.png)
+
+## 5. Weight Histograms
+Another useful debugging tool is to inspect the weight histograms. These are included in the inspect_weights.ipynb notebook.
+
+![](assets/detection_histograms.png)
+
+## 6. Logging to TensorBoard
+TensorBoard is another great debugging and visualization tool. The model is configured to log losses and save weights at the end of every epoch.
+
+![](assets/detection_tensorboard.png)
+
+## 6. Composing the different pieces into a final result
+
+![](assets/detection_final.png)
+
+
+# Training on MS COCO
+We're providing pre-trained weights for MS COCO to make it easier to start. You can
+use those weights as a starting point to train your own variation on the network.
+Training and evaluation code is in `samples/coco/coco.py`. You can import this
+module in Jupyter notebook (see the provided notebooks for examples) or you
+can run it directly from the command line as such:
+
+```
+# Train a new model starting from pre-trained COCO weights
+python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=coco
+
+# Train a new model starting from ImageNet weights
+python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=imagenet
+
+# Continue training a model that you had trained earlier
+python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
+
+# Continue training the last model you trained. This will find
+# the last trained weights in the model directory.
+python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=last
+```
+
+You can also run the COCO evaluation code with:
+```
+# Run COCO evaluation on the last trained model
+python3 samples/coco/coco.py evaluate --dataset=/path/to/coco/ --model=last
+```
+
+The training schedule, learning rate, and other parameters should be set in `samples/coco/coco.py`.
+
+
+# Training on Your Own Dataset
+
+Start by reading this [blog post about the balloon color splash sample](https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46). It covers the process starting from annotating images to training to using the results in a sample application.
+
+In summary, to train the model on your own dataset you'll need to extend two classes:
+
+```Config```
+This class contains the default configuration. Subclass it and modify the attributes you need to change.
+
+```Dataset```
+This class provides a consistent way to work with any dataset. 
+It allows you to use new datasets for training without having to change 
+the code of the model. It also supports loading multiple datasets at the
+same time, which is useful if the objects you want to detect are not 
+all available in one dataset. 
+
+See examples in `samples/shapes/train_shapes.ipynb`, `samples/coco/coco.py`, `samples/balloon/balloon.py`, and `samples/nucleus/nucleus.py`.
+
+## Differences from the Official Paper
+This implementation follows the Mask RCNN paper for the most part, but there are a few cases where we deviated in favor of code simplicity and generalization. These are some of the differences we're aware of. If you encounter other differences, please do let us know.
+
+* **Image Resizing:** To support training multiple images per batch we resize all images to the same size. For example, 1024x1024px on MS COCO. We preserve the aspect ratio, so if an image is not square we pad it with zeros. In the paper the resizing is done such that the smallest side is 800px and the largest is trimmed at 1000px.
+* **Bounding Boxes**: Some datasets provide bounding boxes and some provide masks only. To support training on multiple datasets we opted to ignore the bounding boxes that come with the dataset and generate them on the fly instead. We pick the smallest box that encapsulates all the pixels of the mask as the bounding box. This simplifies the implementation and also makes it easy to apply image augmentations that would otherwise be harder to apply to bounding boxes, such as image rotation.
+
+    To validate this approach, we compared our computed bounding boxes to those provided by the COCO dataset.
+We found that ~2% of bounding boxes differed by 1px or more, ~0.05% differed by 5px or more, 
+and only 0.01% differed by 10px or more.
+
+* **Learning Rate:** The paper uses a learning rate of 0.02, but we found that to be
+too high, and often causes the weights to explode, especially when using a small batch
+size. It might be related to differences between how Caffe and TensorFlow compute 
+gradients (sum vs mean across batches and GPUs). Or, maybe the official model uses gradient
+clipping to avoid this issue. We do use gradient clipping, but don't set it too aggressively.
+We found that smaller learning rates converge faster anyway so we go with that.
+
+## Citation
+Use this bibtex to cite this repository:
+```
+@misc{matterport_maskrcnn_2017,
+  title={Mask R-CNN for object detection and instance segmentation on Keras and TensorFlow},
+  author={Waleed Abdulla},
+  year={2017},
+  publisher={Github},
+  journal={GitHub repository},
+  howpublished={\url{https://github.com/matterport/Mask_RCNN}},
+}
+```
+
+## Contributing
+Contributions to this repository are welcome. Examples of things you can contribute:
+* Speed Improvements. Like re-writing some Python code in TensorFlow or Cython.
+* Training on other datasets.
+* Accuracy Improvements.
+* Visualizations and examples.
+
+You can also [join our team](https://matterport.com/careers/) and help us build even more projects like this one.
+
+## Requirements
+Python 3.4, TensorFlow 1.3, Keras 2.0.8 and other common packages listed in `requirements.txt`.
+
+### MS COCO Requirements:
+To train or test on MS COCO, you'll also need:
+* pycocotools (installation instructions below)
+* [MS COCO Dataset](http://cocodataset.org/#home)
+* Download the 5K [minival](https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0)
+  and the 35K [validation-minus-minival](https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0)
+  subsets. More details in the original [Faster R-CNN implementation](https://github.com/rbgirshick/py-faster-rcnn/blob/master/data/README.md).
+
+If you use Docker, the code has been verified to work on
+[this Docker container](https://hub.docker.com/r/waleedka/modern-deep-learning/).
+
+
+## Installation
+1. Clone this repository
+2. Install dependencies
+   ```bash
+   pip3 install -r requirements.txt
+   ```
+3. Run setup from the repository root directory
+    ```bash
+    python3 setup.py install
+    ``` 
+3. Download pre-trained COCO weights (mask_rcnn_coco.h5) from the [releases page](https://github.com/matterport/Mask_RCNN/releases).
+4. (Optional) To train or test on MS COCO install `pycocotools` from one of these repos. They are forks of the original pycocotools with fixes for Python3 and Windows (the official repo doesn't seem to be active anymore).
+
+    * Linux: https://github.com/waleedka/coco
+    * Windows: https://github.com/philferriere/cocoapi.
+    You must have the Visual C++ 2015 build tools on your path (see the repo for additional details)
+
+# Projects Using this Model
+If you extend this model to other datasets or build projects that use it, we'd love to hear from you.
+
+### [4K Video Demo](https://www.youtube.com/watch?v=OOT3UIXZztE) by Karol Majek.
+[![Mask RCNN on 4K Video](assets/4k_video.gif)](https://www.youtube.com/watch?v=OOT3UIXZztE)
+
+### [Images to OSM](https://github.com/jremillard/images-to-osm): Improve OpenStreetMap by adding baseball, soccer, tennis, football, and basketball fields.
+
+![Identify sport fields in satellite images](assets/images_to_osm.png)
+
+### [Splash of Color](https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46). A blog post explaining how to train this model from scratch and use it to implement a color splash effect.
+![Balloon Color Splash](assets/balloon_color_splash.gif)
+
+
+### [Segmenting Nuclei in Microscopy Images](samples/nucleus). Built for the [2018 Data Science Bowl](https://www.kaggle.com/c/data-science-bowl-2018)
+Code is in the `samples/nucleus` directory.
+
+![Nucleus Segmentation](assets/nucleus_segmentation.png)
+
+### [Detection and Segmentation for Surgery Robots](https://github.com/SUYEgit/Surgery-Robot-Detection-Segmentation) by the NUS Control & Mechatronics Lab.
+![Surgery Robot Detection and Segmentation](https://github.com/SUYEgit/Surgery-Robot-Detection-Segmentation/raw/master/assets/video.gif)
+
+### [Reconstructing 3D buildings from aerial LiDAR](https://medium.com/geoai/reconstructing-3d-buildings-from-aerial-lidar-with-ai-details-6a81cb3079c0)
+A proof of concept project by [Esri](https://www.esri.com/), in collaboration with Nvidia and Miami-Dade County. Along with a great write up and code by Dmitry Kudinov, Daniel Hedges, and Omar Maher.
+![3D Building Reconstruction](assets/project_3dbuildings.png)
+
+### [Usiigaci: Label-free Cell Tracking in Phase Contrast Microscopy](https://github.com/oist/usiigaci)
+A project from Japan to automatically track cells in a microfluidics platform. Paper is pending, but the source code is released.
+
+![](assets/project_usiigaci1.gif) ![](assets/project_usiigaci2.gif)
+
+### [Characterization of Arctic Ice-Wedge Polygons in Very High Spatial Resolution Aerial Imagery](http://www.mdpi.com/2072-4292/10/9/1487)
+Research project to understand the complex processes between degradations in the Arctic and climate change. By Weixing Zhang, Chandi Witharana, Anna Liljedahl, and Mikhail Kanevskiy.
+![image](assets/project_ice_wedge_polygons.png)
+
+### [Mask-RCNN Shiny](https://github.com/huuuuusy/Mask-RCNN-Shiny)
+A computer vision class project by HU Shiyu to apply the color pop effect on people with beautiful results.
+![](assets/project_shiny1.jpg)
+
+### [Mapping Challenge](https://github.com/crowdAI/crowdai-mapping-challenge-mask-rcnn): Convert satellite imagery to maps for use by humanitarian organisations.
+![Mapping Challenge](assets/mapping_challenge.png)
+
+### [GRASS GIS Addon](https://github.com/ctu-geoforall-lab/i.ann.maskrcnn) to generate vector masks from geospatial imagery. Based on a [Master's thesis](https://github.com/ctu-geoforall-lab-projects/dp-pesek-2018) by Ondřej Pešek.
+![GRASS GIS Image](assets/project_grass_gis.png)
\ No newline at end of file
diff --git a/src/tracker/__pycache__/signboard_track.cpython-310.pyc b/src/tracker/__pycache__/signboard_track.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d6888aedcdaac44da9ab7a50f48ebfd7a9b7f2c
Binary files /dev/null and b/src/tracker/__pycache__/signboard_track.cpython-310.pyc differ
diff --git a/src/tracker/__pycache__/signboard_track.cpython-38.pyc b/src/tracker/__pycache__/signboard_track.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16aefe5d2ef1d8d134b2c16645a0c50167bb5b05
Binary files /dev/null and b/src/tracker/__pycache__/signboard_track.cpython-38.pyc differ
diff --git a/src/tracker/__pycache__/signboard_track.cpython-39.pyc b/src/tracker/__pycache__/signboard_track.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4efbf8e04d656b0c3118e5b2fcde31a6737da6bc
Binary files /dev/null and b/src/tracker/__pycache__/signboard_track.cpython-39.pyc differ
diff --git a/src/tracker/_tools_/__pycache__/generate_detections.cpython-38.pyc b/src/tracker/_tools_/__pycache__/generate_detections.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8347618ddf9777e178a627a5db03ef4bfe3e4227
Binary files /dev/null and b/src/tracker/_tools_/__pycache__/generate_detections.cpython-38.pyc differ
diff --git a/src/tracker/_tools_/freeze_model.py b/src/tracker/_tools_/freeze_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ed8d9bfd062163d716c8a47a47a0058ee0862ee
--- /dev/null
+++ b/src/tracker/_tools_/freeze_model.py
@@ -0,0 +1,218 @@
+import argparse
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+
+
+def _batch_norm_fn(x, scope=None):
+    if scope is None:
+        scope = tf.get_variable_scope().name + "/bn"
+    return slim.batch_norm(x, scope=scope)
+
+
+def create_link(
+        incoming, network_builder, scope, nonlinearity=tf.nn.elu,
+        weights_initializer=tf.truncated_normal_initializer(stddev=1e-3),
+        regularizer=None, is_first=False, summarize_activations=True):
+    if is_first:
+        network = incoming
+    else:
+        network = _batch_norm_fn(incoming, scope=scope + "/bn")
+        network = nonlinearity(network)
+        if summarize_activations:
+            tf.summary.histogram(scope+"/activations", network)
+
+    pre_block_network = network
+    post_block_network = network_builder(pre_block_network, scope)
+
+    incoming_dim = pre_block_network.get_shape().as_list()[-1]
+    outgoing_dim = post_block_network.get_shape().as_list()[-1]
+    if incoming_dim != outgoing_dim:
+        assert outgoing_dim == 2 * incoming_dim, \
+            "%d != %d" % (outgoing_dim, 2 * incoming)
+        projection = slim.conv2d(
+            incoming, outgoing_dim, 1, 2, padding="SAME", activation_fn=None,
+            scope=scope+"/projection", weights_initializer=weights_initializer,
+            biases_initializer=None, weights_regularizer=regularizer)
+        network = projection + post_block_network
+    else:
+        network = incoming + post_block_network
+    return network
+
+
+def create_inner_block(
+        incoming, scope, nonlinearity=tf.nn.elu,
+        weights_initializer=tf.truncated_normal_initializer(1e-3),
+        bias_initializer=tf.zeros_initializer(), regularizer=None,
+        increase_dim=False, summarize_activations=True):
+    n = incoming.get_shape().as_list()[-1]
+    stride = 1
+    if increase_dim:
+        n *= 2
+        stride = 2
+
+    incoming = slim.conv2d(
+        incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME",
+        normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer,
+        biases_initializer=bias_initializer, weights_regularizer=regularizer,
+        scope=scope + "/1")
+    if summarize_activations:
+        tf.summary.histogram(incoming.name + "/activations", incoming)
+
+    incoming = slim.dropout(incoming, keep_prob=0.6)
+
+    incoming = slim.conv2d(
+        incoming, n, [3, 3], 1, activation_fn=None, padding="SAME",
+        normalizer_fn=None, weights_initializer=weights_initializer,
+        biases_initializer=bias_initializer, weights_regularizer=regularizer,
+        scope=scope + "/2")
+    return incoming
+
+
+def residual_block(incoming, scope, nonlinearity=tf.nn.elu,
+                   weights_initializer=tf.truncated_normal_initializer(1e3),
+                   bias_initializer=tf.zeros_initializer(), regularizer=None,
+                   increase_dim=False, is_first=False,
+                   summarize_activations=True):
+
+    def network_builder(x, s):
+        return create_inner_block(
+            x, s, nonlinearity, weights_initializer, bias_initializer,
+            regularizer, increase_dim, summarize_activations)
+
+    return create_link(
+        incoming, network_builder, scope, nonlinearity, weights_initializer,
+        regularizer, is_first, summarize_activations)
+
+
+def _create_network(incoming, reuse=None, weight_decay=1e-8):
+    nonlinearity = tf.nn.elu
+    conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
+    conv_bias_init = tf.zeros_initializer()
+    conv_regularizer = slim.l2_regularizer(weight_decay)
+    fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
+    fc_bias_init = tf.zeros_initializer()
+    fc_regularizer = slim.l2_regularizer(weight_decay)
+
+    def batch_norm_fn(x):
+        return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn")
+
+    network = incoming
+    network = slim.conv2d(
+        network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
+        padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1",
+        weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
+        weights_regularizer=conv_regularizer)
+    network = slim.conv2d(
+        network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
+        padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2",
+        weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
+        weights_regularizer=conv_regularizer)
+
+    # NOTE(nwojke): This is missing a padding="SAME" to match the CNN
+    # architecture in Table 1 of the paper. Information on how this affects
+    # performance on MOT 16 training sequences can be found in
+    # issue 10 https://github.com/nwojke/deep_sort/issues/10
+    network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1")
+
+    network = residual_block(
+        network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init,
+        conv_regularizer, increase_dim=False, is_first=True)
+    network = residual_block(
+        network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init,
+        conv_regularizer, increase_dim=False)
+
+    network = residual_block(
+        network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init,
+        conv_regularizer, increase_dim=True)
+    network = residual_block(
+        network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init,
+        conv_regularizer, increase_dim=False)
+
+    network = residual_block(
+        network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init,
+        conv_regularizer, increase_dim=True)
+    network = residual_block(
+        network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init,
+        conv_regularizer, increase_dim=False)
+
+    feature_dim = network.get_shape().as_list()[-1]
+    network = slim.flatten(network)
+
+    network = slim.dropout(network, keep_prob=0.6)
+    network = slim.fully_connected(
+        network, feature_dim, activation_fn=nonlinearity,
+        normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer,
+        scope="fc1", weights_initializer=fc_weight_init,
+        biases_initializer=fc_bias_init)
+
+    features = network
+
+    # Features in rows, normalize axis 1.
+    features = slim.batch_norm(features, scope="ball", reuse=reuse)
+    feature_norm = tf.sqrt(
+        tf.constant(1e-8, tf.float32) +
+        tf.reduce_sum(tf.square(features), [1], keepdims=True))
+    features = features / feature_norm
+    return features, None
+
+
+def _network_factory(weight_decay=1e-8):
+
+    def factory_fn(image, reuse):
+            with slim.arg_scope([slim.batch_norm, slim.dropout],
+                                is_training=False):
+                with slim.arg_scope([slim.conv2d, slim.fully_connected,
+                                     slim.batch_norm, slim.layer_norm],
+                                    reuse=reuse):
+                    features, logits = _create_network(
+                        image, reuse=reuse, weight_decay=weight_decay)
+                    return features, logits
+
+    return factory_fn
+
+
+def _preprocess(image):
+    image = image[:, :, ::-1]  # BGR to RGB
+    return image
+
+
+def parse_args():
+    """Parse command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="Freeze old model")
+    parser.add_argument(
+        "--checkpoint_in",
+        default="resources/networks/mars-small128.ckpt-68577",
+        help="Path to checkpoint file")
+    parser.add_argument(
+        "--graphdef_out",
+        default="resources/networks/mars-small128.pb")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    with tf.Session(graph=tf.Graph()) as session:
+        input_var = tf.placeholder(
+            tf.uint8, (None, 128, 64, 3), name="images")
+        image_var = tf.map_fn(
+            lambda x: _preprocess(x), tf.cast(input_var, tf.float32),
+            back_prop=False)
+
+        factory_fn = _network_factory()
+        features, _ = factory_fn(image_var, reuse=None)
+        features = tf.identity(features, name="features")
+
+        saver = tf.train.Saver(slim.get_variables_to_restore())
+        saver.restore(session, args.checkpoint_in)
+
+        output_graph_def = tf.graph_util.convert_variables_to_constants(
+            session, tf.get_default_graph().as_graph_def(),
+            [features.name.split(":")[0]])
+        with tf.gfile.GFile(args.graphdef_out, "wb") as file_handle:
+            file_handle.write(output_graph_def.SerializeToString())
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/src/tracker/_tools_/generate_detections.py b/src/tracker/_tools_/generate_detections.py
new file mode 100644
index 0000000000000000000000000000000000000000..5819dbed8ca252d027f54e1009bf33920f9c5b55
--- /dev/null
+++ b/src/tracker/_tools_/generate_detections.py
@@ -0,0 +1,212 @@
+import os
+import errno
+import argparse
+import numpy as np
+import cv2
+import tensorflow.compat.v1 as tf
+
+#tf.compat.v1.disable_eager_execution()
+
+physical_devices = tf.config.experimental.list_physical_devices('GPU')
+if len(physical_devices) > 0:
+    tf.config.experimental.set_memory_growth(physical_devices[0], True)
+
+def _run_in_batches(f, data_dict, out, batch_size):
+    data_len = len(out)
+    num_batches = int(data_len / batch_size)
+
+    s, e = 0, 0
+    for i in range(num_batches):
+        s, e = i * batch_size, (i + 1) * batch_size
+        batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
+        out[s:e] = f(batch_data_dict)
+    if e < len(out):
+        batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
+        out[e:] = f(batch_data_dict)
+
+
+def extract_image_patch(image, bbox, patch_shape):
+    """Extract image patch from bounding box.
+    Parameters
+    ----------
+    image : ndarray
+        The full image.
+    bbox : array_like
+        The bounding box in format (x, y, width, height).
+    patch_shape : Optional[array_like]
+        This parameter can be used to enforce a desired patch shape
+        (height, width). First, the `bbox` is adapted to the aspect ratio
+        of the patch shape, then it is clipped at the image boundaries.
+        If None, the shape is computed from :arg:`bbox`.
+    Returns
+    -------
+    ndarray | NoneType
+        An image patch showing the :arg:`bbox`, optionally reshaped to
+        :arg:`patch_shape`.
+        Returns None if the bounding box is empty or fully outside of the image
+        boundaries.
+    """
+    bbox = np.array(bbox)
+    if patch_shape is not None:
+        # correct aspect ratio to patch shape
+        target_aspect = float(patch_shape[1]) / patch_shape[0]
+        new_width = target_aspect * bbox[3]
+        bbox[0] -= (new_width - bbox[2]) / 2
+        bbox[2] = new_width
+
+    # convert to top left, bottom right
+    bbox[2:] += bbox[:2]
+    bbox = bbox.astype(np.int)
+
+    # clip at image boundaries
+    bbox[:2] = np.maximum(0, bbox[:2])
+    bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
+    if np.any(bbox[:2] >= bbox[2:]):
+        return None
+    sx, sy, ex, ey = bbox
+    image = image[sy:ey, sx:ex]
+    image = cv2.resize(image, tuple(patch_shape[::-1]))
+    return image
+
+
+class ImageEncoder(object):
+
+    def __init__(self, checkpoint_filename, input_name="images",
+                 output_name="features"):
+        self.session = tf.Session()
+        with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
+            graph_def = tf.GraphDef()
+            graph_def.ParseFromString(file_handle.read())
+        tf.import_graph_def(graph_def, name="net")
+        self.input_var = tf.get_default_graph().get_tensor_by_name(
+            "%s:0" % input_name)
+        self.output_var = tf.get_default_graph().get_tensor_by_name(
+            "%s:0" % output_name)
+
+        assert len(self.output_var.get_shape()) == 2
+        assert len(self.input_var.get_shape()) == 4
+        self.feature_dim = self.output_var.get_shape().as_list()[-1]
+        self.image_shape = self.input_var.get_shape().as_list()[1:]
+
+    def __call__(self, data_x, batch_size=32):
+        out = np.zeros((len(data_x), self.feature_dim), np.float32)
+        _run_in_batches(
+            lambda x: self.session.run(self.output_var, feed_dict=x),
+            {self.input_var: data_x}, out, batch_size)
+        return out
+
+
+def create_box_encoder(model_filename, input_name="images",
+                       output_name="features", batch_size=32):
+    image_encoder = ImageEncoder(model_filename, input_name, output_name)
+    image_shape = image_encoder.image_shape
+
+    def encoder(image, boxes):
+        image_patches = []
+        for box in boxes:
+            patch = extract_image_patch(image, box, image_shape[:2])
+            if patch is None:
+                print("WARNING: Failed to extract image patch: %s." % str(box))
+                patch = np.random.uniform(
+                    0., 255., image_shape).astype(np.uint8)
+            image_patches.append(patch)
+        image_patches = np.asarray(image_patches)
+        return image_encoder(image_patches, batch_size)
+
+    return encoder
+
+
+def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
+    """Generate detections with features.
+    Parameters
+    ----------
+    encoder : Callable[image, ndarray] -> ndarray
+        The encoder function takes as input a BGR color image and a matrix of
+        bounding boxes in format `(x, y, w, h)` and returns a matrix of
+        corresponding feature vectors.
+    mot_dir : str
+        Path to the MOTChallenge directory (can be either train or test).
+    output_dir
+        Path to the output directory. Will be created if it does not exist.
+    detection_dir
+        Path to custom detections. The directory structure should be the default
+        MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
+        standard MOTChallenge detections.
+    """
+    if detection_dir is None:
+        detection_dir = mot_dir
+    try:
+        os.makedirs(output_dir)
+    except OSError as exception:
+        if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
+            pass
+        else:
+            raise ValueError(
+                "Failed to created output directory '%s'" % output_dir)
+
+    for sequence in os.listdir(mot_dir):
+        print("Processing %s" % sequence)
+        sequence_dir = os.path.join(mot_dir, sequence)
+
+        image_dir = os.path.join(sequence_dir, "img1")
+        image_filenames = {
+            int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
+            for f in os.listdir(image_dir)}
+
+        detection_file = os.path.join(
+            detection_dir, sequence, "det/det.txt")
+        detections_in = np.loadtxt(detection_file, delimiter=',')
+        detections_out = []
+
+        frame_indices = detections_in[:, 0].astype(np.int)
+        min_frame_idx = frame_indices.astype(np.int).min()
+        max_frame_idx = frame_indices.astype(np.int).max()
+        for frame_idx in range(min_frame_idx, max_frame_idx + 1):
+            print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
+            mask = frame_indices == frame_idx
+            rows = detections_in[mask]
+
+            if frame_idx not in image_filenames:
+                print("WARNING could not find image for frame %d" % frame_idx)
+                continue
+            bgr_image = cv2.imread(
+                image_filenames[frame_idx], cv2.IMREAD_COLOR)
+            features = encoder(bgr_image, rows[:, 2:6].copy())
+            detections_out += [np.r_[(row, feature)] for row, feature
+                               in zip(rows, features)]
+
+        output_filename = os.path.join(output_dir, "%s.npy" % sequence)
+        np.save(
+            output_filename, np.asarray(detections_out), allow_pickle=False)
+
+
+def parse_args():
+    """Parse command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="Re-ID feature extractor")
+    parser.add_argument(
+        "--model",
+        default="resources/networks/mars-small128.pb",
+        help="Path to freezed inference graph protobuf.")
+    parser.add_argument(
+        "--mot_dir", help="Path to MOTChallenge directory (train or test)",
+        required=True)
+    parser.add_argument(
+        "--detection_dir", help="Path to custom detections. Defaults to "
+        "standard MOT detections Directory structure should be the default "
+        "MOTChallenge structure: [sequence]/det/det.txt", default=None)
+    parser.add_argument(
+        "--output_dir", help="Output directory. Will be created if it does not"
+        " exist.", default="detections")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    encoder = create_box_encoder(args.model, batch_size=32)
+    generate_detections(encoder, args.mot_dir, args.output_dir,
+                        args.detection_dir)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/src/tracker/application_util/__init__.py b/src/tracker/application_util/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/tracker/application_util/__pycache__/__init__.cpython-38.pyc b/src/tracker/application_util/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..60f5df4d4a65ab836d786d11597e347e34511b40
Binary files /dev/null and b/src/tracker/application_util/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/tracker/application_util/__pycache__/preprocessing.cpython-38.pyc b/src/tracker/application_util/__pycache__/preprocessing.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d107d6fe36ffd93e1e9ea2301a1400b0adea8092
Binary files /dev/null and b/src/tracker/application_util/__pycache__/preprocessing.cpython-38.pyc differ
diff --git a/src/tracker/application_util/image_viewer.py b/src/tracker/application_util/image_viewer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bea047557773d5d66b8b25d79afa5e929ba4476
--- /dev/null
+++ b/src/tracker/application_util/image_viewer.py
@@ -0,0 +1,312 @@
+"""
+This module contains an image viewer and drawing routines based on OpenCV.
+"""
+import numpy as np
+import cv2
+import time
+
+
+def is_in_bounds(mat, roi):
+    """Check if ROI is fully contained in the image.
+    Parameters
+    ----------
+    mat : ndarray
+        An ndarray of ndim>=2.
+    roi : (int, int, int, int)
+        Region of interest (x, y, width, height) where (x, y) is the top-left
+        corner.
+    Returns
+    -------
+    bool
+        Returns true if the ROI is contain in mat.
+    """
+    if roi[0] < 0 or roi[0] + roi[2] >= mat.shape[1]:
+        return False
+    if roi[1] < 0 or roi[1] + roi[3] >= mat.shape[0]:
+        return False
+    return True
+
+
+def view_roi(mat, roi):
+    """Get sub-array.
+    The ROI must be valid, i.e., fully contained in the image.
+    Parameters
+    ----------
+    mat : ndarray
+        An ndarray of ndim=2 or ndim=3.
+    roi : (int, int, int, int)
+        Region of interest (x, y, width, height) where (x, y) is the top-left
+        corner.
+    Returns
+    -------
+    ndarray
+        A view of the roi.
+    """
+    sx, ex = roi[0], roi[0] + roi[2]
+    sy, ey = roi[1], roi[1] + roi[3]
+    if mat.ndim == 2:
+        return mat[sy:ey, sx:ex]
+    else:
+        return mat[sy:ey, sx:ex, :]
+
+
+class ImageViewer(object):
+    """An image viewer with drawing routines and video capture capabilities.
+    Key Bindings:
+    * 'SPACE' : pause
+    * 'ESC' : quit
+    Parameters
+    ----------
+    update_ms : int
+        Number of milliseconds between frames (1000 / frames per second).
+    window_shape : (int, int)
+        Shape of the window (width, height).
+    caption : Optional[str]
+        Title of the window.
+    Attributes
+    ----------
+    image : ndarray
+        Color image of shape (height, width, 3). You may directly manipulate
+        this image to change the view. Otherwise, you may call any of the
+        drawing routines of this class. Internally, the image is treated as
+        beeing in BGR color space.
+        Note that the image is resized to the the image viewers window_shape
+        just prior to visualization. Therefore, you may pass differently sized
+        images and call drawing routines with the appropriate, original point
+        coordinates.
+    color : (int, int, int)
+        Current BGR color code that applies to all drawing routines.
+        Values are in range [0-255].
+    text_color : (int, int, int)
+        Current BGR text color code that applies to all text rendering
+        routines. Values are in range [0-255].
+    thickness : int
+        Stroke width in pixels that applies to all drawing routines.
+    """
+
+    def __init__(self, update_ms, window_shape=(640, 480), caption="Figure 1"):
+        self._window_shape = window_shape
+        self._caption = caption
+        self._update_ms = update_ms
+        self._video_writer = None
+        self._user_fun = lambda: None
+        self._terminate = False
+
+        self.image = np.zeros(self._window_shape + (3, ), dtype=np.uint8)
+        self._color = (0, 0, 0)
+        self.text_color = (255, 255, 255)
+        self.thickness = 1
+
+    @property
+    def color(self):
+        return self._color
+
+    @color.setter
+    def color(self, value):
+        if len(value) != 3:
+            raise ValueError("color must be tuple of 3")
+        self._color = tuple(int(c) for c in value)
+
+    def rectangle(self, x, y, w, h, label=None):
+        """Draw a rectangle.
+        Parameters
+        ----------
+        x : float | int
+            Top left corner of the rectangle (x-axis).
+        y : float | int
+            Top let corner of the rectangle (y-axis).
+        w : float | int
+            Width of the rectangle.
+        h : float | int
+            Height of the rectangle.
+        label : Optional[str]
+            A text label that is placed at the top left corner of the
+            rectangle.
+        """
+        pt1 = int(x), int(y)
+        pt2 = int(x + w), int(y + h)
+        cv2.rectangle(self.image, pt1, pt2, self._color, self.thickness)
+        if label is not None:
+            text_size = cv2.getTextSize(
+                label, cv2.FONT_HERSHEY_PLAIN, 1, self.thickness)
+
+            center = pt1[0] + 5, pt1[1] + 5 + text_size[0][1]
+            pt2 = pt1[0] + 10 + text_size[0][0], pt1[1] + 10 + \
+                text_size[0][1]
+            cv2.rectangle(self.image, pt1, pt2, self._color, -1)
+            cv2.putText(self.image, label, center, cv2.FONT_HERSHEY_PLAIN,
+                        1, (255, 255, 255), self.thickness)
+
+    def circle(self, x, y, radius, label=None):
+        """Draw a circle.
+        Parameters
+        ----------
+        x : float | int
+            Center of the circle (x-axis).
+        y : float | int
+            Center of the circle (y-axis).
+        radius : float | int
+            Radius of the circle in pixels.
+        label : Optional[str]
+            A text label that is placed at the center of the circle.
+        """
+        image_size = int(radius + self.thickness + 1.5)  # actually half size
+        roi = int(x - image_size), int(y - image_size), \
+            int(2 * image_size), int(2 * image_size)
+        if not is_in_bounds(self.image, roi):
+            return
+
+        image = view_roi(self.image, roi)
+        center = image.shape[1] // 2, image.shape[0] // 2
+        cv2.circle(
+            image, center, int(radius + .5), self._color, self.thickness)
+        if label is not None:
+            cv2.putText(
+                self.image, label, center, cv2.FONT_HERSHEY_PLAIN,
+                2, self.text_color, 2)
+
+    def gaussian(self, mean, covariance, label=None):
+        """Draw 95% confidence ellipse of a 2-D Gaussian distribution.
+        Parameters
+        ----------
+        mean : array_like
+            The mean vector of the Gaussian distribution (ndim=1).
+        covariance : array_like
+            The 2x2 covariance matrix of the Gaussian distribution.
+        label : Optional[str]
+            A text label that is placed at the center of the ellipse.
+        """
+        # chi2inv(0.95, 2) = 5.9915
+        vals, vecs = np.linalg.eigh(5.9915 * covariance)
+        indices = vals.argsort()[::-1]
+        vals, vecs = np.sqrt(vals[indices]), vecs[:, indices]
+
+        center = int(mean[0] + .5), int(mean[1] + .5)
+        axes = int(vals[0] + .5), int(vals[1] + .5)
+        angle = int(180. * np.arctan2(vecs[1, 0], vecs[0, 0]) / np.pi)
+        cv2.ellipse(
+            self.image, center, axes, angle, 0, 360, self._color, 2)
+        if label is not None:
+            cv2.putText(self.image, label, center, cv2.FONT_HERSHEY_PLAIN,
+                        2, self.text_color, 2)
+
+    def annotate(self, x, y, text):
+        """Draws a text string at a given location.
+        Parameters
+        ----------
+        x : int | float
+            Bottom-left corner of the text in the image (x-axis).
+        y : int | float
+            Bottom-left corner of the text in the image (y-axis).
+        text : str
+            The text to be drawn.
+        """
+        cv2.putText(self.image, text, (int(x), int(y)), cv2.FONT_HERSHEY_PLAIN,
+                    2, self.text_color, 2)
+
+    def colored_points(self, points, colors=None, skip_index_check=False):
+        """Draw a collection of points.
+        The point size is fixed to 1.
+        Parameters
+        ----------
+        points : ndarray
+            The Nx2 array of image locations, where the first dimension is
+            the x-coordinate and the second dimension is the y-coordinate.
+        colors : Optional[ndarray]
+            The Nx3 array of colors (dtype=np.uint8). If None, the current
+            color attribute is used.
+        skip_index_check : Optional[bool]
+            If True, index range checks are skipped. This is faster, but
+            requires all points to lie within the image dimensions.
+        """
+        if not skip_index_check:
+            cond1, cond2 = points[:, 0] >= 0, points[:, 0] < 480
+            cond3, cond4 = points[:, 1] >= 0, points[:, 1] < 640
+            indices = np.logical_and.reduce((cond1, cond2, cond3, cond4))
+            points = points[indices, :]
+        if colors is None:
+            colors = np.repeat(
+                self._color, len(points)).reshape(3, len(points)).T
+        indices = (points + .5).astype(np.int)
+        self.image[indices[:, 1], indices[:, 0], :] = colors
+
+    def enable_videowriter(self, output_filename, fourcc_string="MJPG",
+                           fps=None):
+        """ Write images to video file.
+        Parameters
+        ----------
+        output_filename : str
+            Output filename.
+        fourcc_string : str
+            The OpenCV FOURCC code that defines the video codec (check OpenCV
+            documentation for more information).
+        fps : Optional[float]
+            Frames per second. If None, configured according to current
+            parameters.
+        """
+        fourcc = cv2.VideoWriter_fourcc(*fourcc_string)
+        if fps is None:
+            fps = int(1000. / self._update_ms)
+        self._video_writer = cv2.VideoWriter(
+            output_filename, fourcc, fps, self._window_shape)
+
+    def disable_videowriter(self):
+        """ Disable writing videos.
+        """
+        self._video_writer = None
+
+    def run(self, update_fun=None):
+        """Start the image viewer.
+        This method blocks until the user requests to close the window.
+        Parameters
+        ----------
+        update_fun : Optional[Callable[] -> None]
+            An optional callable that is invoked at each frame. May be used
+            to play an animation/a video sequence.
+        """
+        if update_fun is not None:
+            self._user_fun = update_fun
+
+        self._terminate, is_paused = False, False
+        # print("ImageViewer is paused, press space to start.")
+        while not self._terminate:
+            t0 = time.time()
+            if not is_paused:
+                self._terminate = not self._user_fun()
+                if self._video_writer is not None:
+                    self._video_writer.write(
+                        cv2.resize(self.image, self._window_shape))
+            t1 = time.time()
+            remaining_time = max(1, int(self._update_ms - 1e3*(t1-t0)))
+            cv2.imshow(
+                self._caption, cv2.resize(self.image, self._window_shape[:2]))
+            key = cv2.waitKey(remaining_time)
+            if key & 255 == 27:  # ESC
+                print("terminating")
+                self._terminate = True
+            elif key & 255 == 32:  # ' '
+                print("toggeling pause: " + str(not is_paused))
+                is_paused = not is_paused
+            elif key & 255 == 115:  # 's'
+                print("stepping")
+                self._terminate = not self._user_fun()
+                is_paused = True
+
+        # Due to a bug in OpenCV we must call imshow after destroying the
+        # window. This will make the window appear again as soon as waitKey
+        # is called.
+        #
+        # see https://github.com/Itseez/opencv/issues/4535
+        self.image[:] = 0
+        cv2.destroyWindow(self._caption)
+        cv2.waitKey(1)
+        cv2.imshow(self._caption, self.image)
+
+    def stop(self):
+        """Stop the control loop.
+        After calling this method, the viewer will stop execution before the
+        next frame and hand over control flow to the user.
+        Parameters
+        ----------
+        """
+        self._terminate = True
\ No newline at end of file
diff --git a/src/tracker/application_util/preprocessing.py b/src/tracker/application_util/preprocessing.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa24a75bddf22a443a92b91c7579d59054592bca
--- /dev/null
+++ b/src/tracker/application_util/preprocessing.py
@@ -0,0 +1,66 @@
+import numpy as np
+import cv2
+
+
+def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
+    """Suppress overlapping detections.
+    Original code from [1]_ has been adapted to include confidence score.
+    .. [1] http://www.pyimagesearch.com/2015/02/16/
+           faster-non-maximum-suppression-python/
+    Examples
+    --------
+        >>> boxes = [d.roi for d in detections]
+        >>> classes = [d.classes for d in detections]
+        >>> scores = [d.confidence for d in detections]
+        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
+        >>> detections = [detections[i] for i in indices]
+    Parameters
+    ----------
+    boxes : ndarray
+        Array of ROIs (x, y, width, height).
+    max_bbox_overlap : float
+        ROIs that overlap more than this values are suppressed.
+    scores : Optional[array_like]
+        Detector confidence score.
+    Returns
+    -------
+    List[int]
+        Returns indices of detections that have survived non-maxima suppression.
+    """
+    if len(boxes) == 0:
+        return []
+
+    boxes = boxes.astype(np.float)
+    pick = []
+
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2] + boxes[:, 0]
+    y2 = boxes[:, 3] + boxes[:, 1]
+
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+    if scores is not None:
+        idxs = np.argsort(scores)
+    else:
+        idxs = np.argsort(y2)
+
+    while len(idxs) > 0:
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+
+        xx1 = np.maximum(x1[i], x1[idxs[:last]])
+        yy1 = np.maximum(y1[i], y1[idxs[:last]])
+        xx2 = np.minimum(x2[i], x2[idxs[:last]])
+        yy2 = np.minimum(y2[i], y2[idxs[:last]])
+
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+
+        overlap = (w * h) / area[idxs[:last]]
+
+        idxs = np.delete(
+            idxs, np.concatenate(
+                ([last], np.where(overlap > max_bbox_overlap)[0])))
+
+    return pick
\ No newline at end of file
diff --git a/src/tracker/application_util/visualization.py b/src/tracker/application_util/visualization.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b092924fae24f3448c08babdc21b52284cab821
--- /dev/null
+++ b/src/tracker/application_util/visualization.py
@@ -0,0 +1,125 @@
+import numpy as np
+import colorsys
+from .image_viewer import ImageViewer
+
+
+def create_unique_color_float(tag, hue_step=0.41):
+    """Create a unique RGB color code for a given track id (tag).
+    The color code is generated in HSV color space by moving along the
+    hue angle and gradually changing the saturation.
+    Parameters
+    ----------
+    tag : int
+        The unique target identifying tag.
+    hue_step : float
+        Difference between two neighboring color codes in HSV space (more
+        specifically, the distance in hue channel).
+    Returns
+    -------
+    (float, float, float)
+        RGB color code in range [0, 1]
+    """
+    h, v = (tag * hue_step) % 1, 1. - (int(tag * hue_step) % 4) / 5.
+    r, g, b = colorsys.hsv_to_rgb(h, 1., v)
+    return r, g, b
+
+
+def create_unique_color_uchar(tag, hue_step=0.41):
+    """Create a unique RGB color code for a given track id (tag).
+    The color code is generated in HSV color space by moving along the
+    hue angle and gradually changing the saturation.
+    Parameters
+    ----------
+    tag : int
+        The unique target identifying tag.
+    hue_step : float
+        Difference between two neighboring color codes in HSV space (more
+        specifically, the distance in hue channel).
+    Returns
+    -------
+    (int, int, int)
+        RGB color code in range [0, 255]
+    """
+    r, g, b = create_unique_color_float(tag, hue_step)
+    return int(255*r), int(255*g), int(255*b)
+
+
+class NoVisualization(object):
+    """
+    A dummy visualization object that loops through all frames in a given
+    sequence to update the tracker without performing any visualization.
+    """
+
+    def __init__(self, seq_info):
+        self.frame_idx = seq_info["min_frame_idx"]
+        self.last_idx = seq_info["max_frame_idx"]
+
+    def set_image(self, image):
+        pass
+
+    def draw_groundtruth(self, track_ids, boxes):
+        pass
+
+    def draw_detections(self, detections):
+        pass
+
+    def draw_trackers(self, trackers):
+        pass
+
+    def run(self, frame_callback):
+        while self.frame_idx <= self.last_idx:
+            frame_callback(self, self.frame_idx)
+            self.frame_idx += 1
+
+
+class Visualization(object):
+    """
+    This class shows tracking output in an OpenCV image viewer.
+    """
+
+    def __init__(self, seq_info, update_ms):
+        image_shape = seq_info["image_size"][::-1]
+        aspect_ratio = float(image_shape[1]) / image_shape[0]
+        image_shape = 1024, int(aspect_ratio * 1024)
+        self.viewer = ImageViewer(
+            update_ms, image_shape, "Figure %s" % seq_info["sequence_name"])
+        self.viewer.thickness = 2
+        self.frame_idx = seq_info["min_frame_idx"]
+        self.last_idx = seq_info["max_frame_idx"]
+
+    def run(self, frame_callback):
+        self.viewer.run(lambda: self._update_fun(frame_callback))
+
+    def _update_fun(self, frame_callback):
+        if self.frame_idx > self.last_idx:
+            return False  # Terminate
+        frame_callback(self, self.frame_idx)
+        self.frame_idx += 1
+        return True
+
+    def set_image(self, image):
+        self.viewer.image = image
+
+    def draw_groundtruth(self, track_ids, boxes):
+        self.viewer.thickness = 2
+        for track_id, box in zip(track_ids, boxes):
+            self.viewer.color = create_unique_color_uchar(track_id)
+            self.viewer.rectangle(*box.astype(np.int), label=str(track_id))
+
+    def draw_detections(self, detections):
+        self.viewer.thickness = 2
+        self.viewer.color = 0, 0, 255
+        for i, detection in enumerate(detections):
+            self.viewer.rectangle(*detection.tlwh)
+
+    def draw_trackers(self, tracks):
+        self.viewer.thickness = 2
+        for track in tracks:
+            if not track.is_confirmed() or track.time_since_update > 0:
+                continue
+            self.viewer.color = create_unique_color_uchar(track.track_id)
+            self.viewer.rectangle(
+                *track.to_tlwh().astype(np.int), label=str(track.track_id))
+            # self.viewer.gaussian(track.mean[:2], track.covariance[:2, :2],
+            #                      label="%d" % track.track_id)
+#
\ No newline at end of file
diff --git a/src/tracker/deep_sort/__init__.py b/src/tracker/deep_sort/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/tracker/deep_sort/__pycache__/__init__.cpython-38.pyc b/src/tracker/deep_sort/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..57bb915b78f7cd9c4c75cd32e39f41349e515cc2
Binary files /dev/null and b/src/tracker/deep_sort/__pycache__/__init__.cpython-38.pyc differ
diff --git a/src/tracker/deep_sort/__pycache__/detection.cpython-38.pyc b/src/tracker/deep_sort/__pycache__/detection.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..040765bbcb5bbd0659bf1489cd18abe59cc25f92
Binary files /dev/null and b/src/tracker/deep_sort/__pycache__/detection.cpython-38.pyc differ
diff --git a/src/tracker/deep_sort/__pycache__/iou_matching.cpython-38.pyc b/src/tracker/deep_sort/__pycache__/iou_matching.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ec9f1cba1dd2549d7a161c2c6b3908a5033ba57
Binary files /dev/null and b/src/tracker/deep_sort/__pycache__/iou_matching.cpython-38.pyc differ
diff --git a/src/tracker/deep_sort/__pycache__/kalman_filter.cpython-38.pyc b/src/tracker/deep_sort/__pycache__/kalman_filter.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6d41f8b2f145b20be7f1f6686edb98e666edad55
Binary files /dev/null and b/src/tracker/deep_sort/__pycache__/kalman_filter.cpython-38.pyc differ
diff --git a/src/tracker/deep_sort/__pycache__/linear_assignment.cpython-38.pyc b/src/tracker/deep_sort/__pycache__/linear_assignment.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e90802ff9c26c423246ce7113d23473189fb4cff
Binary files /dev/null and b/src/tracker/deep_sort/__pycache__/linear_assignment.cpython-38.pyc differ
diff --git a/src/tracker/deep_sort/__pycache__/nn_matching.cpython-38.pyc b/src/tracker/deep_sort/__pycache__/nn_matching.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..490f5e373311b71912bf10277c0b0bef006fb1cc
Binary files /dev/null and b/src/tracker/deep_sort/__pycache__/nn_matching.cpython-38.pyc differ
diff --git a/src/tracker/deep_sort/__pycache__/track.cpython-38.pyc b/src/tracker/deep_sort/__pycache__/track.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..51a80cbffd62760f6d840ad261b27953d33c8f9f
Binary files /dev/null and b/src/tracker/deep_sort/__pycache__/track.cpython-38.pyc differ
diff --git a/src/tracker/deep_sort/__pycache__/tracker.cpython-38.pyc b/src/tracker/deep_sort/__pycache__/tracker.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2794cd86826e8ef5deaf38f55ce32439f51cad8f
Binary files /dev/null and b/src/tracker/deep_sort/__pycache__/tracker.cpython-38.pyc differ
diff --git a/src/tracker/deep_sort/detection.py b/src/tracker/deep_sort/detection.py
new file mode 100644
index 0000000000000000000000000000000000000000..f41d77b809506833f7075b464d0c67c2d8e302be
--- /dev/null
+++ b/src/tracker/deep_sort/detection.py
@@ -0,0 +1,51 @@
+import numpy as np
+
+
+class Detection(object):
+    """
+    This class represents a bounding box detection in a single image.
+    Parameters
+    ----------
+    tlwh : array_like
+        Bounding box in format `(x, y, w, h)`.
+    confidence : float
+        Detector confidence score.
+    feature : array_like
+        A feature vector that describes the object contained in this image.
+    Attributes
+    ----------
+    tlwh : ndarray
+        Bounding box in format `(top left x, top left y, width, height)`.
+    confidence : ndarray
+        Detector confidence score.
+    class_name : ndarray
+        Detector class.
+    feature : ndarray | NoneType
+        A feature vector that describes the object contained in this image.
+    """
+
+    def __init__(self, tlwh, confidence, class_name, feature):
+        self.tlwh = np.asarray(tlwh, dtype=np.float)
+        self.confidence = float(confidence)
+        self.class_name = class_name
+        self.feature = np.asarray(feature, dtype=np.float32)
+
+    def get_class(self):
+        return self.class_name
+
+    def to_tlbr(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def to_xyah(self):
+        """Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = self.tlwh.copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
\ No newline at end of file
diff --git a/src/tracker/deep_sort/detection_.py b/src/tracker/deep_sort/detection_.py
new file mode 100644
index 0000000000000000000000000000000000000000..103243b8a7976f742277cce49f9037ae54889f60
--- /dev/null
+++ b/src/tracker/deep_sort/detection_.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+
+class Detection(object):
+    """
+    This class represents a bounding box detection in a single image.
+    Parameters
+    ----------
+    tlwh : array_like
+        Bounding box in format `(x, y, w, h)`.
+    confidence : float
+        Detector confidence score.
+    feature : array_like
+        A feature vector that describes the object contained in this image.
+    Attributes
+    ----------
+    tlwh : ndarray
+        Bounding box in format `(top left x, top left y, width, height)`.
+    confidence : ndarray
+        Detector confidence score.
+    feature : ndarray | NoneType
+        A feature vector that describes the object contained in this image.
+    """
+
+    def __init__(self, tlwh, score, classe, mask, color, feature):
+        self.tlwh = np.asarray(tlwh, dtype=np.float)
+        self.score = float(score)
+        self.classe = classe
+        self.mask = mask
+        self.color = color
+        self.feature = np.asarray(feature, dtype=np.float32)
+
+    def to_tlbr(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def to_xyah(self):
+        """Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = self.tlwh.copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
\ No newline at end of file
diff --git a/src/tracker/deep_sort/iou_matching.py b/src/tracker/deep_sort/iou_matching.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c86aec5d35bf57c95cb894e357fd283b04993eb
--- /dev/null
+++ b/src/tracker/deep_sort/iou_matching.py
@@ -0,0 +1,74 @@
+from __future__ import absolute_import
+import numpy as np
+from . import linear_assignment
+
+
+def iou(bbox, candidates):
+    """Computer intersection over union.
+    Parameters
+    ----------
+    bbox : ndarray
+        A bounding box in format `(top left x, top left y, width, height)`.
+    candidates : ndarray
+        A matrix of candidate bounding boxes (one per row) in the same format
+        as `bbox`.
+    Returns
+    -------
+    ndarray
+        The intersection over union in [0, 1] between the `bbox` and each
+        candidate. A higher score means a larger fraction of the `bbox` is
+        occluded by the candidate.
+    """
+    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
+    candidates_tl = candidates[:, :2]
+    candidates_br = candidates[:, :2] + candidates[:, 2:]
+
+    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
+               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
+    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
+               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
+    wh = np.maximum(0., br - tl)
+
+    area_intersection = wh.prod(axis=1)
+    area_bbox = bbox[2:].prod()
+    area_candidates = candidates[:, 2:].prod(axis=1)
+    return area_intersection / (area_bbox + area_candidates - area_intersection)
+
+
+def iou_cost(tracks, detections, track_indices=None,
+             detection_indices=None):
+    """An intersection over union distance metric.
+    Parameters
+    ----------
+    tracks : List[deep_sort.track.Track]
+        A list of tracks.
+    detections : List[deep_sort.detection.Detection]
+        A list of detections.
+    track_indices : Optional[List[int]]
+        A list of indices to tracks that should be matched. Defaults to
+        all `tracks`.
+    detection_indices : Optional[List[int]]
+        A list of indices to detections that should be matched. Defaults
+        to all `detections`.
+    Returns
+    -------
+    ndarray
+        Returns a cost matrix of shape
+        len(track_indices), len(detection_indices) where entry (i, j) is
+        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
+    for row, track_idx in enumerate(track_indices):
+        if tracks[track_idx].time_since_update > 1:
+            cost_matrix[row, :] = linear_assignment.INFTY_COST
+            continue
+
+        bbox = tracks[track_idx].to_tlwh()
+        candidates = np.asarray([detections[i].tlwh for i in detection_indices])
+        cost_matrix[row, :] = 1. - iou(bbox, candidates)
+    return cost_matrix
\ No newline at end of file
diff --git a/src/tracker/deep_sort/kalman_filter.py b/src/tracker/deep_sort/kalman_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e19f56f8e5beb563f018ab03ba7d47f438734d3
--- /dev/null
+++ b/src/tracker/deep_sort/kalman_filter.py
@@ -0,0 +1,207 @@
+import numpy as np
+import scipy.linalg
+
+
+"""
+Table for the 0.95 quantile of the chi-square distribution with N degrees of
+freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
+function and used as Mahalanobis gating threshold.
+"""
+chi2inv95 = {
+    1: 3.8415,
+    2: 5.9915,
+    3: 7.8147,
+    4: 9.4877,
+    5: 11.070,
+    6: 12.592,
+    7: 14.067,
+    8: 15.507,
+    9: 16.919}
+
+
+class KalmanFilter(object):
+    """
+    A simple Kalman filter for tracking bounding boxes in image space.
+    The 8-dimensional state space
+        x, y, a, h, vx, vy, va, vh
+    contains the bounding box center position (x, y), aspect ratio a, height h,
+    and their respective velocities.
+    Object motion follows a constant velocity model. The bounding box location
+    (x, y, a, h) is taken as direct observation of the state space (linear
+    observation model).
+    """
+
+    def __init__(self):
+        ndim, dt = 4, 1.
+
+        # Create Kalman filter model matrices.
+        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
+        for i in range(ndim):
+            self._motion_mat[i, ndim + i] = dt
+        self._update_mat = np.eye(ndim, 2 * ndim)
+
+        # Motion and observation uncertainty are chosen relative to the current
+        # state estimate. These weights control the amount of uncertainty in
+        # the model. This is a bit hacky.
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+
+    def initiate(self, measurement):
+        """Create track from unassociated measurement.
+        Parameters
+        ----------
+        measurement : ndarray
+            Bounding box coordinates (x, y, a, h) with center position (x, y),
+            aspect ratio a, and height h.
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector (8 dimensional) and covariance matrix (8x8
+            dimensional) of the new track. Unobserved velocities are initialized
+            to 0 mean.
+        """
+        mean_pos = measurement
+        mean_vel = np.zeros_like(mean_pos)
+        mean = np.r_[mean_pos, mean_vel]
+
+        std = [
+            2 * self._std_weight_position * measurement[3],
+            2 * self._std_weight_position * measurement[3],
+            1e-2,
+            2 * self._std_weight_position * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            1e-5,
+            10 * self._std_weight_velocity * measurement[3]]
+        covariance = np.diag(np.square(std))
+        return mean, covariance
+
+    def predict(self, mean, covariance):
+        """Run Kalman filter prediction step.
+        Parameters
+        ----------
+        mean : ndarray
+            The 8 dimensional mean vector of the object state at the previous
+            time step.
+        covariance : ndarray
+            The 8x8 dimensional covariance matrix of the object state at the
+            previous time step.
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector and covariance matrix of the predicted
+            state. Unobserved velocities are initialized to 0 mean.
+        """
+        std_pos = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-2,
+            self._std_weight_position * mean[3]]
+        std_vel = [
+            self._std_weight_velocity * mean[3],
+            self._std_weight_velocity * mean[3],
+            1e-5,
+            self._std_weight_velocity * mean[3]]
+        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        mean = np.dot(self._motion_mat, mean)
+        covariance = np.linalg.multi_dot((
+            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
+
+        return mean, covariance
+
+    def project(self, mean, covariance):
+        """Project state distribution to measurement space.
+        Parameters
+        ----------
+        mean : ndarray
+            The state's mean vector (8 dimensional array).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the projected mean and covariance matrix of the given state
+            estimate.
+        """
+        std = [
+            self._std_weight_position * mean[3],
+            self._std_weight_position * mean[3],
+            1e-1,
+            self._std_weight_position * mean[3]]
+        innovation_cov = np.diag(np.square(std))
+
+        mean = np.dot(self._update_mat, mean)
+        covariance = np.linalg.multi_dot((
+            self._update_mat, covariance, self._update_mat.T))
+        return mean, covariance + innovation_cov
+
+    def update(self, mean, covariance, measurement):
+        """Run Kalman filter correction step.
+        Parameters
+        ----------
+        mean : ndarray
+            The predicted state's mean vector (8 dimensional).
+        covariance : ndarray
+            The state's covariance matrix (8x8 dimensional).
+        measurement : ndarray
+            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
+            is the center position, a the aspect ratio, and h the height of the
+            bounding box.
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the measurement-corrected state distribution.
+        """
+        projected_mean, projected_cov = self.project(mean, covariance)
+
+        chol_factor, lower = scipy.linalg.cho_factor(
+            projected_cov, lower=True, check_finite=False)
+        kalman_gain = scipy.linalg.cho_solve(
+            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
+            check_finite=False).T
+        innovation = measurement - projected_mean
+
+        new_mean = mean + np.dot(innovation, kalman_gain.T)
+        new_covariance = covariance - np.linalg.multi_dot((
+            kalman_gain, projected_cov, kalman_gain.T))
+        return new_mean, new_covariance
+
+    def gating_distance(self, mean, covariance, measurements,
+                        only_position=False):
+        """Compute gating distance between state distribution and measurements.
+        A suitable distance threshold can be obtained from `chi2inv95`. If
+        `only_position` is False, the chi-square distribution has 4 degrees of
+        freedom, otherwise 2.
+        Parameters
+        ----------
+        mean : ndarray
+            Mean vector over the state distribution (8 dimensional).
+        covariance : ndarray
+            Covariance of the state distribution (8x8 dimensional).
+        measurements : ndarray
+            An Nx4 dimensional matrix of N measurements, each in
+            format (x, y, a, h) where (x, y) is the bounding box center
+            position, a the aspect ratio, and h the height.
+        only_position : Optional[bool]
+            If True, distance computation is done with respect to the bounding
+            box center position only.
+        Returns
+        -------
+        ndarray
+            Returns an array of length N, where the i-th element contains the
+            squared Mahalanobis distance between (mean, covariance) and
+            `measurements[i]`.
+        """
+        mean, covariance = self.project(mean, covariance)
+        if only_position:
+            mean, covariance = mean[:2], covariance[:2, :2]
+            measurements = measurements[:, :2]
+
+        cholesky_factor = np.linalg.cholesky(covariance)
+        d = measurements - mean
+        z = scipy.linalg.solve_triangular(
+            cholesky_factor, d.T, lower=True, check_finite=False,
+            overwrite_b=True)
+        squared_maha = np.sum(z * z, axis=0)
+        return squared_maha
\ No newline at end of file
diff --git a/src/tracker/deep_sort/linear_assignment.py b/src/tracker/deep_sort/linear_assignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..622863baa70523d98606023aefde8f2f6450aa19
--- /dev/null
+++ b/src/tracker/deep_sort/linear_assignment.py
@@ -0,0 +1,181 @@
+from __future__ import absolute_import
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from . import kalman_filter
+
+
+INFTY_COST = 1e+5
+
+
+def min_cost_matching(
+        distance_metric, max_distance, tracks, detections, track_indices=None,
+        detection_indices=None):
+    """Solve linear assignment problem.
+    Parameters
+    ----------
+    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+        The distance metric is given a list of tracks and detections as well as
+        a list of N track indices and M detection indices. The metric should
+        return the NxM dimensional cost matrix, where element (i, j) is the
+        association cost between the i-th track in the given track indices and
+        the j-th detection in the given detection_indices.
+    max_distance : float
+        Gating threshold. Associations with cost larger than this value are
+        disregarded.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : List[int]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above).
+    detection_indices : List[int]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above).
+    Returns
+    -------
+    (List[(int, int)], List[int], List[int])
+        Returns a tuple with the following three entries:
+        * A list of matched track and detection indices.
+        * A list of unmatched track indices.
+        * A list of unmatched detection indices.
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    if len(detection_indices) == 0 or len(track_indices) == 0:
+        return [], track_indices, detection_indices  # Nothing to match.
+
+    cost_matrix = distance_metric(
+        tracks, detections, track_indices, detection_indices)
+    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
+    indices = linear_sum_assignment(cost_matrix)
+    indices = np.asarray(indices)
+    indices = np.transpose(indices)
+    matches, unmatched_tracks, unmatched_detections = [], [], []
+    for col, detection_idx in enumerate(detection_indices):
+        if col not in indices[:, 1]:
+            unmatched_detections.append(detection_idx)
+    for row, track_idx in enumerate(track_indices):
+        if row not in indices[:, 0]:
+            unmatched_tracks.append(track_idx)
+    for row, col in indices:
+        track_idx = track_indices[row]
+        detection_idx = detection_indices[col]
+        if cost_matrix[row, col] > max_distance:
+            unmatched_tracks.append(track_idx)
+            unmatched_detections.append(detection_idx)
+        else:
+            matches.append((track_idx, detection_idx))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def matching_cascade(
+        distance_metric, max_distance, cascade_depth, tracks, detections,
+        track_indices=None, detection_indices=None):
+    """Run matching cascade.
+    Parameters
+    ----------
+    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+        The distance metric is given a list of tracks and detections as well as
+        a list of N track indices and M detection indices. The metric should
+        return the NxM dimensional cost matrix, where element (i, j) is the
+        association cost between the i-th track in the given track indices and
+        the j-th detection in the given detection indices.
+    max_distance : float
+        Gating threshold. Associations with cost larger than this value are
+        disregarded.
+    cascade_depth: int
+        The cascade depth, should be se to the maximum track age.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : Optional[List[int]]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above). Defaults to all tracks.
+    detection_indices : Optional[List[int]]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above). Defaults to all
+        detections.
+    Returns
+    -------
+    (List[(int, int)], List[int], List[int])
+        Returns a tuple with the following three entries:
+        * A list of matched track and detection indices.
+        * A list of unmatched track indices.
+        * A list of unmatched detection indices.
+    """
+    if track_indices is None:
+        track_indices = list(range(len(tracks)))
+    if detection_indices is None:
+        detection_indices = list(range(len(detections)))
+
+    unmatched_detections = detection_indices
+    matches = []
+    for level in range(cascade_depth):
+        if len(unmatched_detections) == 0:  # No detections left
+            break
+
+        track_indices_l = [
+            k for k in track_indices
+            if tracks[k].time_since_update == 1 + level
+        ]
+        if len(track_indices_l) == 0:  # Nothing to match at this level
+            continue
+
+        matches_l, _, unmatched_detections = \
+            min_cost_matching(
+                distance_metric, max_distance, tracks, detections,
+                track_indices_l, unmatched_detections)
+        matches += matches_l
+    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def gate_cost_matrix(
+        kf, cost_matrix, tracks, detections, track_indices, detection_indices,
+        gated_cost=INFTY_COST, only_position=False):
+    """Invalidate infeasible entries in cost matrix based on the state
+    distributions obtained by Kalman filtering.
+    Parameters
+    ----------
+    kf : The Kalman filter.
+    cost_matrix : ndarray
+        The NxM dimensional cost matrix, where N is the number of track indices
+        and M is the number of detection indices, such that entry (i, j) is the
+        association cost between `tracks[track_indices[i]]` and
+        `detections[detection_indices[j]]`.
+    tracks : List[track.Track]
+        A list of predicted tracks at the current time step.
+    detections : List[detection.Detection]
+        A list of detections at the current time step.
+    track_indices : List[int]
+        List of track indices that maps rows in `cost_matrix` to tracks in
+        `tracks` (see description above).
+    detection_indices : List[int]
+        List of detection indices that maps columns in `cost_matrix` to
+        detections in `detections` (see description above).
+    gated_cost : Optional[float]
+        Entries in the cost matrix corresponding to infeasible associations are
+        set this value. Defaults to a very large value.
+    only_position : Optional[bool]
+        If True, only the x, y position of the state distribution is considered
+        during gating. Defaults to False.
+    Returns
+    -------
+    ndarray
+        Returns the modified cost matrix.
+    """
+    gating_dim = 2 if only_position else 4
+    gating_threshold = kalman_filter.chi2inv95[gating_dim]
+    measurements = np.asarray(
+        [detections[i].to_xyah() for i in detection_indices])
+    for row, track_idx in enumerate(track_indices):
+        track = tracks[track_idx]
+        gating_distance = kf.gating_distance(
+            track.mean, track.covariance, measurements, only_position)
+        cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+    return cost_matrix
\ No newline at end of file
diff --git a/src/tracker/deep_sort/nn_matching.py b/src/tracker/deep_sort/nn_matching.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad29d9bf04e37b35a9b3fd7ab3fdbc0010c101ed
--- /dev/null
+++ b/src/tracker/deep_sort/nn_matching.py
@@ -0,0 +1,156 @@
+import numpy as np
+
+
+def _pdist(a, b):
+    """Compute pair-wise squared distance between points in `a` and `b`.
+    Parameters
+    ----------
+    a : array_like
+        An NxM matrix of N samples of dimensionality M.
+    b : array_like
+        An LxM matrix of L samples of dimensionality M.
+    Returns
+    -------
+    ndarray
+        Returns a matrix of size len(a), len(b) such that eleement (i, j)
+        contains the squared distance between `a[i]` and `b[j]`.
+    """
+    a, b = np.asarray(a), np.asarray(b)
+    if len(a) == 0 or len(b) == 0:
+        return np.zeros((len(a), len(b)))
+    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
+    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
+    r2 = np.clip(r2, 0., float(np.inf))
+    return r2
+
+
+def _cosine_distance(a, b, data_is_normalized=False):
+    """Compute pair-wise cosine distance between points in `a` and `b`.
+    Parameters
+    ----------
+    a : array_like
+        An NxM matrix of N samples of dimensionality M.
+    b : array_like
+        An LxM matrix of L samples of dimensionality M.
+    data_is_normalized : Optional[bool]
+        If True, assumes rows in a and b are unit length vectors.
+        Otherwise, a and b are explicitly normalized to lenght 1.
+    Returns
+    -------
+    ndarray
+        Returns a matrix of size len(a), len(b) such that eleement (i, j)
+        contains the squared distance between `a[i]` and `b[j]`.
+    """
+    if not data_is_normalized:
+        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
+        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
+    return 1. - np.dot(a, b.T)
+
+
+def _nn_euclidean_distance(x, y):
+    """ Helper function for nearest neighbor distance metric (Euclidean).
+    Parameters
+    ----------
+    x : ndarray
+        A matrix of N row-vectors (sample points).
+    y : ndarray
+        A matrix of M row-vectors (query points).
+    Returns
+    -------
+    ndarray
+        A vector of length M that contains for each entry in `y` the
+        smallest Euclidean distance to a sample in `x`.
+    """
+    distances = _pdist(x, y)
+    return np.maximum(0.0, distances.min(axis=0))
+
+
+def _nn_cosine_distance(x, y):
+    """ Helper function for nearest neighbor distance metric (cosine).
+    Parameters
+    ----------
+    x : ndarray
+        A matrix of N row-vectors (sample points).
+    y : ndarray
+        A matrix of M row-vectors (query points).
+    Returns
+    -------
+    ndarray
+        A vector of length M that contains for each entry in `y` the
+        smallest cosine distance to a sample in `x`.
+    """
+    distances = _cosine_distance(x, y)
+    return distances.min(axis=0)
+
+
+class NearestNeighborDistanceMetric(object):
+    """
+    A nearest neighbor distance metric that, for each target, returns
+    the closest distance to any sample that has been observed so far.
+    Parameters
+    ----------
+    metric : str
+        Either "euclidean" or "cosine".
+    matching_threshold: float
+        The matching threshold. Samples with larger distance are considered an
+        invalid match.
+    budget : Optional[int]
+        If not None, fix samples per class to at most this number. Removes
+        the oldest samples when the budget is reached.
+    Attributes
+    ----------
+    samples : Dict[int -> List[ndarray]]
+        A dictionary that maps from target identities to the list of samples
+        that have been observed so far.
+    """
+
+    def __init__(self, metric, matching_threshold, budget=None):
+
+
+        if metric == "euclidean":
+            self._metric = _nn_euclidean_distance
+        elif metric == "cosine":
+            self._metric = _nn_cosine_distance
+        else:
+            raise ValueError(
+                "Invalid metric; must be either 'euclidean' or 'cosine'")
+        self.matching_threshold = matching_threshold
+        self.budget = budget
+        self.samples = {}
+
+    def partial_fit(self, features, targets, active_targets):
+        """Update the distance metric with new data.
+        Parameters
+        ----------
+        features : ndarray
+            An NxM matrix of N features of dimensionality M.
+        targets : ndarray
+            An integer array of associated target identities.
+        active_targets : List[int]
+            A list of targets that are currently present in the scene.
+        """
+        for feature, target in zip(features, targets):
+            self.samples.setdefault(target, []).append(feature)
+            if self.budget is not None:
+                self.samples[target] = self.samples[target][-self.budget:]
+        self.samples = {k: self.samples[k] for k in active_targets}
+
+    def distance(self, features, targets):
+        """Compute distance between features and targets.
+        Parameters
+        ----------
+        features : ndarray
+            An NxM matrix of N features of dimensionality M.
+        targets : List[int]
+            A list of targets to match the given `features` against.
+        Returns
+        -------
+        ndarray
+            Returns a cost matrix of shape len(targets), len(features), where
+            element (i, j) contains the closest squared distance between
+            `targets[i]` and `features[j]`.
+        """
+        cost_matrix = np.zeros((len(targets), len(features)))
+        for i, target in enumerate(targets):
+            cost_matrix[i, :] = self._metric(self.samples[target], features)
+        return cost_matrix
\ No newline at end of file
diff --git a/src/tracker/deep_sort/track.py b/src/tracker/deep_sort/track.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec4c7021cd997aa873210e3b93441c8488bfdc2b
--- /dev/null
+++ b/src/tracker/deep_sort/track.py
@@ -0,0 +1,155 @@
+class TrackState:
+    """
+    Enumeration type for the single target track state. Newly created tracks are
+    classified as `tentative` until enough evidence has been collected. Then,
+    the track state is changed to `confirmed`. Tracks that are no longer alive
+    are classified as `deleted` to mark them for removal from the set of active
+    tracks.
+    """
+
+    Tentative = 1
+    Confirmed = 2
+    Deleted = 3
+
+
+class Track:
+    """
+    A single target track with state space `(x, y, a, h)` and associated
+    velocities, where `(x, y)` is the center of the bounding box, `a` is the
+    aspect ratio and `h` is the height.
+    Parameters
+    ----------
+    mean : ndarray
+        Mean vector of the initial state distribution.
+    covariance : ndarray
+        Covariance matrix of the initial state distribution.
+    track_id : int
+        A unique track identifier.
+    n_init : int
+        Number of consecutive detections before the track is confirmed. The
+        track state is set to `Deleted` if a miss occurs within the first
+        `n_init` frames.
+    max_age : int
+        The maximum number of consecutive misses before the track state is
+        set to `Deleted`.
+    feature : Optional[ndarray]
+        Feature vector of the detection this track originates from. If not None,
+        this feature is added to the `features` cache.
+    Attributes
+    ----------
+    mean : ndarray
+        Mean vector of the initial state distribution.
+    covariance : ndarray
+        Covariance matrix of the initial state distribution.
+    track_id : int
+        A unique track identifier.
+    hits : int
+        Total number of measurement updates.
+    age : int
+        Total number of frames since first occurance.
+    time_since_update : int
+        Total number of frames since last measurement update.
+    state : TrackState
+        The current track state.
+    features : List[ndarray]
+        A cache of features. On each measurement update, the associated feature
+        vector is added to this list.
+    """
+
+    def __init__(self, mean, covariance, track_id, n_init, max_age,
+                 feature=None, class_name=None):
+        self.mean = mean
+        self.covariance = covariance
+        self.track_id = track_id
+        self.hits = 1
+        self.age = 1
+        self.time_since_update = 0
+
+        self.state = TrackState.Tentative
+        self.features = []
+        if feature is not None:
+            self.features.append(feature)
+
+        self._n_init = n_init
+        self._max_age = max_age
+        self.class_name = class_name
+
+    def to_tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+        width, height)`.
+        Returns
+        -------
+        ndarray
+            The bounding box.
+        """
+        ret = self.mean[:4].copy()
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    def to_tlbr(self):
+        """Get current position in bounding box format `(min x, miny, max x,
+        max y)`.
+        Returns
+        -------
+        ndarray
+            The bounding box.
+        """
+        ret = self.to_tlwh()
+        ret[2:] = ret[:2] + ret[2:]
+        return ret
+    
+    def get_class(self):
+        return self.class_name
+
+    def predict(self, kf):
+        """Propagate the state distribution to the current time step using a
+        Kalman filter prediction step.
+        Parameters
+        ----------
+        kf : kalman_filter.KalmanFilter
+            The Kalman filter.
+        """
+        self.mean, self.covariance = kf.predict(self.mean, self.covariance)
+        self.age += 1
+        self.time_since_update += 1
+
+    def update(self, kf, detection):
+        """Perform Kalman filter measurement update step and update the feature
+        cache.
+        Parameters
+        ----------
+        kf : kalman_filter.KalmanFilter
+            The Kalman filter.
+        detection : Detection
+            The associated detection.
+        """
+        self.mean, self.covariance = kf.update(
+            self.mean, self.covariance, detection.to_xyah())
+        self.features.append(detection.feature)
+
+        self.hits += 1
+        self.time_since_update = 0
+        if self.state == TrackState.Tentative and self.hits >= self._n_init:
+            self.state = TrackState.Confirmed
+
+    def mark_missed(self):
+        """Mark this track as missed (no association at the current time step).
+        """
+        if self.state == TrackState.Tentative:
+            self.state = TrackState.Deleted
+        elif self.time_since_update > self._max_age:
+            self.state = TrackState.Deleted
+
+    def is_tentative(self):
+        """Returns True if this track is tentative (unconfirmed).
+        """
+        return self.state == TrackState.Tentative
+
+    def is_confirmed(self):
+        """Returns True if this track is confirmed."""
+        return self.state == TrackState.Confirmed
+
+    def is_deleted(self):
+        """Returns True if this track is dead and should be deleted."""
+        return self.state == TrackState.Deleted
\ No newline at end of file
diff --git a/src/tracker/deep_sort/tracker.py b/src/tracker/deep_sort/tracker.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7602477fabb99a6432cd193729e2b20b24b061b
--- /dev/null
+++ b/src/tracker/deep_sort/tracker.py
@@ -0,0 +1,132 @@
+from __future__ import absolute_import
+import numpy as np
+from . import kalman_filter
+from . import linear_assignment
+from . import iou_matching
+from .track import Track
+
+
+class Tracker:
+    """
+    This is the multi-target tracker.
+    Parameters
+    ----------
+    metric : nn_matching.NearestNeighborDistanceMetric
+        A distance metric for measurement-to-track association.
+    max_age : int
+        Maximum number of missed misses before a track is deleted.
+    n_init : int
+        Number of consecutive detections before the track is confirmed. The
+        track state is set to `Deleted` if a miss occurs within the first
+        `n_init` frames.
+    Attributes
+    ----------
+    metric : nn_matching.NearestNeighborDistanceMetric
+        The distance metric used for measurement to track association.
+    max_age : int
+        Maximum number of missed misses before a track is deleted.
+    n_init : int
+        Number of frames that a track remains in initialization phase.
+    kf : kalman_filter.KalmanFilter
+        A Kalman filter to filter target trajectories in image space.
+    tracks : List[Track]
+        The list of active tracks at the current time step.
+    """
+
+    def __init__(self, metric, max_iou_distance=0.7, max_age=60, n_init=3):
+        self.metric = metric
+        self.max_iou_distance = max_iou_distance
+        self.max_age = max_age
+        self.n_init = n_init
+
+        self.kf = kalman_filter.KalmanFilter()
+        self.tracks = []
+        self._next_id = 1
+
+    def predict(self):
+        """Propagate track state distributions one time step forward.
+        This function should be called once every time step, before `update`.
+        """
+        for track in self.tracks:
+            track.predict(self.kf)
+
+    def update(self, detections):
+        """Perform measurement update and track management.
+        Parameters
+        ----------
+        detections : List[deep_sort.detection.Detection]
+            A list of detections at the current time step.
+        """
+        # Run matching cascade.
+        matches, unmatched_tracks, unmatched_detections = \
+            self._match(detections)
+
+        # Update track set.
+        for track_idx, detection_idx in matches:
+            self.tracks[track_idx].update(
+                self.kf, detections[detection_idx])
+        for track_idx in unmatched_tracks:
+            self.tracks[track_idx].mark_missed()
+        for detection_idx in unmatched_detections:
+            self._initiate_track(detections[detection_idx])
+        self.tracks = [t for t in self.tracks if not t.is_deleted()]
+
+        # Update distance metric.
+        active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
+        features, targets = [], []
+        for track in self.tracks:
+            if not track.is_confirmed():
+                continue
+            features += track.features
+            targets += [track.track_id for _ in track.features]
+            track.features = []
+        self.metric.partial_fit(
+            np.asarray(features), np.asarray(targets), active_targets)
+
+    def _match(self, detections):
+
+        def gated_metric(tracks, dets, track_indices, detection_indices):
+            features = np.array([dets[i].feature for i in detection_indices])
+            targets = np.array([tracks[i].track_id for i in track_indices])
+            cost_matrix = self.metric.distance(features, targets)
+            cost_matrix = linear_assignment.gate_cost_matrix(
+                self.kf, cost_matrix, tracks, dets, track_indices,
+                detection_indices)
+
+            return cost_matrix
+
+        # Split track set into confirmed and unconfirmed tracks.
+        confirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if t.is_confirmed()]
+        unconfirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
+
+        # Associate confirmed tracks using appearance features.
+        matches_a, unmatched_tracks_a, unmatched_detections = \
+            linear_assignment.matching_cascade(
+                gated_metric, self.metric.matching_threshold, self.max_age,
+                self.tracks, detections, confirmed_tracks)
+
+        # Associate remaining tracks together with unconfirmed tracks using IOU.
+        iou_track_candidates = unconfirmed_tracks + [
+            k for k in unmatched_tracks_a if
+            self.tracks[k].time_since_update == 1]
+        unmatched_tracks_a = [
+            k for k in unmatched_tracks_a if
+            self.tracks[k].time_since_update != 1]
+        matches_b, unmatched_tracks_b, unmatched_detections = \
+            linear_assignment.min_cost_matching(
+                iou_matching.iou_cost, self.max_iou_distance, self.tracks,
+                detections, iou_track_candidates, unmatched_detections)
+
+        matches = matches_a + matches_b
+        unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
+        return matches, unmatched_tracks, unmatched_detections
+
+    def _initiate_track(self, detection):
+        mean, covariance = self.kf.initiate(detection.to_xyah())
+        class_name = detection.get_class()
+        self.tracks.append(Track(
+            mean, covariance, self._next_id, self.n_init, self.max_age,
+            detection.feature, class_name))
+        self._next_id += 1
\ No newline at end of file
diff --git a/src/tracker/deep_sort_app.py b/src/tracker/deep_sort_app.py
new file mode 100644
index 0000000000000000000000000000000000000000..b50fd5f7245e7eb39f8413a5d058bfde12ffbfe9
--- /dev/null
+++ b/src/tracker/deep_sort_app.py
@@ -0,0 +1,253 @@
+from __future__ import division, print_function, absolute_import
+
+import argparse
+import os
+
+import cv2
+import numpy as np
+
+from application_util import preprocessing
+from application_util import visualization
+from deep_sort import nn_matching
+from deep_sort.detection import Detection
+from deep_sort.tracker import Tracker
+
+
+def gather_sequence_info(sequence_dir, detection_file):
+    """Gather sequence information, such as image filenames, detections,
+    groundtruth (if available).
+    Parameters
+    ----------
+    sequence_dir : str
+        Path to the MOTChallenge sequence directory.
+    detection_file : str
+        Path to the detection file.
+    Returns
+    -------
+    Dict
+        A dictionary of the following sequence information:
+        * sequence_name: Name of the sequence
+        * image_filenames: A dictionary that maps frame indices to image
+          filenames.
+        * detections: A numpy array of detections in MOTChallenge format.
+        * groundtruth: A numpy array of ground truth in MOTChallenge format.
+        * image_size: Image size (height, width).
+        * min_frame_idx: Index of the first frame.
+        * max_frame_idx: Index of the last frame.
+    """
+    image_dir = os.path.join(sequence_dir, "img1")
+    image_filenames = {
+        int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
+        for f in os.listdir(image_dir)}
+    groundtruth_file = os.path.join(sequence_dir, "gt/gt.txt")
+
+    detections = None
+    if detection_file is not None:
+        detections = np.load(detection_file)
+    groundtruth = None
+    if os.path.exists(groundtruth_file):
+        groundtruth = np.loadtxt(groundtruth_file, delimiter=',')
+
+    if len(image_filenames) > 0:
+        image = cv2.imread(next(iter(image_filenames.values())),
+                           cv2.IMREAD_GRAYSCALE)
+        image_size = image.shape
+    else:
+        image_size = None
+
+    if len(image_filenames) > 0:
+        min_frame_idx = min(image_filenames.keys())
+        max_frame_idx = max(image_filenames.keys())
+    else:
+        min_frame_idx = int(detections[:, 0].min())
+        max_frame_idx = int(detections[:, 0].max())
+
+    info_filename = os.path.join(sequence_dir, "seqinfo.ini")
+    if os.path.exists(info_filename):
+        with open(info_filename, "r") as f:
+            line_splits = [l.split('=') for l in f.read().splitlines()[1:]]
+            info_dict = dict(
+                s for s in line_splits if isinstance(s, list) and len(s) == 2)
+
+        update_ms = 1000 / int(info_dict["frameRate"])
+    else:
+        update_ms = None
+
+    feature_dim = detections.shape[1] - 10 if detections is not None else 0
+    seq_info = {
+        "sequence_name": os.path.basename(sequence_dir),
+        "image_filenames": image_filenames,
+        "detections": detections,
+        "groundtruth": groundtruth,
+        "image_size": image_size,
+        "min_frame_idx": min_frame_idx,
+        "max_frame_idx": max_frame_idx,
+        "feature_dim": feature_dim,
+        "update_ms": update_ms
+    }
+    return seq_info
+
+
+def create_detections(detection_mat, frame_idx, min_height=0):
+    """Create detections for given frame index from the raw detection matrix.
+    Parameters
+    ----------
+    detection_mat : ndarray
+        Matrix of detections. The first 10 columns of the detection matrix are
+        in the standard MOTChallenge detection format. In the remaining columns
+        store the feature vector associated with each detection.
+    frame_idx : int
+        The frame index.
+    min_height : Optional[int]
+        A minimum detection bounding box height. Detections that are smaller
+        than this value are disregarded.
+    Returns
+    -------
+    List[tracker.Detection]
+        Returns detection responses at given frame index.
+    """
+    frame_indices = detection_mat[:, 0].astype(np.int)
+    mask = frame_indices == frame_idx
+
+    detection_list = []
+    for row in detection_mat[mask]:
+        bbox, confidence, feature = row[2:6], row[6], row[10:]
+        if bbox[3] < min_height:
+            continue
+        detection_list.append(Detection(bbox, confidence, feature))
+    return detection_list
+
+
+def run(sequence_dir, detection_file, output_file, min_confidence,
+        nms_max_overlap, min_detection_height, max_cosine_distance,
+        nn_budget, display):
+    """Run multi-target tracker on a particular sequence.
+    Parameters
+    ----------
+    sequence_dir : str
+        Path to the MOTChallenge sequence directory.
+    detection_file : str
+        Path to the detections file.
+    output_file : str
+        Path to the tracking output file. This file will contain the tracking
+        results on completion.
+    min_confidence : float
+        Detection confidence threshold. Disregard all detections that have
+        a confidence lower than this value.
+    nms_max_overlap: float
+        Maximum detection overlap (non-maxima suppression threshold).
+    min_detection_height : int
+        Detection height threshold. Disregard all detections that have
+        a height lower than this value.
+    max_cosine_distance : float
+        Gating threshold for cosine distance metric (object appearance).
+    nn_budget : Optional[int]
+        Maximum size of the appearance descriptor gallery. If None, no budget
+        is enforced.
+    display : bool
+        If True, show visualization of intermediate tracking results.
+    """
+    seq_info = gather_sequence_info(sequence_dir, detection_file)
+    metric = nn_matching.NearestNeighborDistanceMetric(
+        "cosine", max_cosine_distance, nn_budget)
+    tracker = Tracker(metric)
+    results = []
+
+    def frame_callback(vis, frame_idx):
+        print("Processing frame %05d" % frame_idx)
+
+        # Load image and generate detections.
+        detections = create_detections(
+            seq_info["detections"], frame_idx, min_detection_height)
+        detections = [d for d in detections if d.confidence >= min_confidence]
+
+        # Run non-maxima suppression.
+        boxes = np.array([d.tlwh for d in detections])
+        scores = np.array([d.confidence for d in detections])
+        indices = preprocessing.non_max_suppression(
+            boxes, nms_max_overlap, scores)
+        detections = [detections[i] for i in indices]
+
+        # Update tracker.
+        tracker.predict()
+        tracker.update(detections)
+
+        # Update visualization.
+        if display:
+            image = cv2.imread(
+                seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR)
+            vis.set_image(image.copy())
+            vis.draw_detections(detections)
+            vis.draw_trackers(tracker.tracks)
+
+        # Store results.
+        for track in tracker.tracks:
+            if not track.is_confirmed() or track.time_since_update > 1:
+                continue
+            bbox = track.to_tlwh()
+            results.append([
+                frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]])
+
+    # Run tracker.
+    if display:
+        visualizer = visualization.Visualization(seq_info, update_ms=5)
+    else:
+        visualizer = visualization.NoVisualization(seq_info)
+    visualizer.run(frame_callback)
+
+    # Store results.
+    f = open(output_file, 'w')
+    for row in results:
+        print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (
+            row[0], row[1], row[2], row[3], row[4], row[5]),file=f)
+
+
+def bool_string(input_string):
+    if input_string not in {"True","False"}:
+        raise ValueError("Please Enter a valid Ture/False choice")
+    else:
+        return (input_string == "True")
+
+def parse_args():
+    """ Parse command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="Deep SORT")
+    parser.add_argument(
+        "--sequence_dir", help="Path to MOTChallenge sequence directory",
+        default=None, required=True)
+    parser.add_argument(
+        "--detection_file", help="Path to custom detections.", default=None,
+        required=True)
+    parser.add_argument(
+        "--output_file", help="Path to the tracking output file. This file will"
+        " contain the tracking results on completion.",
+        default="/tmp/hypotheses.txt")
+    parser.add_argument(
+        "--min_confidence", help="Detection confidence threshold. Disregard "
+        "all detections that have a confidence lower than this value.",
+        default=0.8, type=float)
+    parser.add_argument(
+        "--min_detection_height", help="Threshold on the detection bounding "
+        "box height. Detections with height smaller than this value are "
+        "disregarded", default=0, type=int)
+    parser.add_argument(
+        "--nms_max_overlap",  help="Non-maxima suppression threshold: Maximum "
+        "detection overlap.", default=1.0, type=float)
+    parser.add_argument(
+        "--max_cosine_distance", help="Gating threshold for cosine distance "
+        "metric (object appearance).", type=float, default=0.2)
+    parser.add_argument(
+        "--nn_budget", help="Maximum size of the appearance descriptors "
+        "gallery. If None, no budget is enforced.", type=int, default=None)
+    parser.add_argument(
+        "--display", help="Show intermediate tracking results",
+        default=True, type=bool_string)
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    run(
+        args.sequence_dir, args.detection_file, args.output_file,
+        args.min_confidence, args.nms_max_overlap, args.min_detection_height,
+        args.max_cosine_distance, args.nn_budget, args.display)
\ No newline at end of file
diff --git a/src/tracker/detect_video_tracker_color.py b/src/tracker/detect_video_tracker_color.py
new file mode 100644
index 0000000000000000000000000000000000000000..522c8f9f97e4773440e12e8109803aaca0b29c8d
--- /dev/null
+++ b/src/tracker/detect_video_tracker_color.py
@@ -0,0 +1,215 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import time
+import json
+import argparse
+import sys, os
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+import cv2
+import numpy as np
+import shutil
+from PIL import Image
+
+from application_util import preprocessing
+from deep_sort import nn_matching
+from deep_sort.detection_ import Detection
+from deep_sort.tracker import Tracker
+from tools import generate_detections
+from collections import deque
+
+from mrcnn.mrcnn_color import MRCNN, isInSide
+
+out_path = "cailiao"
+
+if os.path.exists(out_path):
+    try:
+        os.remove(out_path)
+    except:
+        shutil.rmtree(out_path, ignore_errors=True)
+os.makedirs(out_path)
+
+pts = [deque(maxlen=30) for _ in range(9999)]
+
+track_total = []
+
+total_count = {}
+
+frameNum = 0
+
+# python detect_video_tracker_color.py --video_file test.mp4 --min_score 0.3 --input_size 1024 --model_file model_data/train_mask_rcnn.h5 --model_feature model_data/mars-small128.pb
+parser = argparse.ArgumentParser()
+parser.add_argument('--video_file', type=str, default='test.mp4', help='data mp4 file.')
+parser.add_argument('--min_score', type=float, default=0.3, help='displays the lowest tracking score.')
+parser.add_argument('--input_size', type=int, default=1024, help='input pic size.')
+parser.add_argument('--model_file', type=str, default='model_data/mask_rcnn_coco.h5', help='Object detection model file.')
+parser.add_argument('--model_feature', type=str, default='model_data/market1501.pb', help='target tracking model file.')
+ARGS = parser.parse_args()
+
+box_size = 2
+font_scale = 0.4
+
+if __name__ == '__main__':
+    # Deep SORT
+    encoder = generate_detections.create_box_encoder(ARGS.model_feature, batch_size=1)
+    metric = nn_matching.NearestNeighborDistanceMetric("cosine", ARGS.min_score, None)
+    tracker = Tracker(metric)
+
+    mrcnn = MRCNN(ARGS.model_file, ARGS.input_size, ARGS.min_score)
+
+    video = cv2.VideoCapture(ARGS.video_file)
+
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    fps = video.get(cv2.CAP_PROP_FPS)
+    size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+    video_out = cv2.VideoWriter(out_path + "/outputVideo.mp4", fourcc, fps, size)
+
+    while video.isOpened:
+        retval, frame = video.read()
+        if retval:
+            frame_orig = frame.copy()
+        else:
+            break
+
+        prev_time = time.time()
+        
+        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        im_pil = Image.fromarray(image)
+        boxes, scores, classes, masks, colors = mrcnn.detect_result_(im_pil, ARGS.min_score)
+
+        features = encoder(frame, boxes)
+        detections = []
+        for bbox, score, classe, mask, color, feature in zip(boxes, scores, classes, masks, colors, features):
+            detections.append(Detection(bbox, score, classe, mask, color, feature))
+
+        boxes = np.array([d.tlwh for d in detections])
+        scores = np.array([d.score for d in detections])
+        indices = preprocessing.non_max_suppression(boxes, 1.0, scores)
+        detections = [detections[i] for i in indices]
+
+        detect_count = {}
+        detect_temp = []
+        for det in detections:
+            y1, x1, y2, x2 = np.array(det.to_tlbr(), dtype=np.int32)
+            caption = '{} {:.2f}'.format(det.classe, det.score) if det.classe else det.score
+
+            frame = mrcnn.apply_mask(frame, det.mask, det.color, 0.3)
+            cv2.rectangle(frame, (y1, x1), (y2, x2), det.color, box_size)
+
+            point = (int((y1+y2)/2),int((x1+x2)/2))
+            # cv2.circle(frame, point, 1, det.color[3:], box_size)
+
+            cv2.putText(
+                frame,
+                caption,
+                (y1, x1 - 5),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                font_scale, det.color,
+                box_size//2,
+                lineType=cv2.LINE_AA
+            )
+
+            if det.classe not in detect_count: detect_count[det.classe] = 0
+            detect_count[det.classe] += 1
+            detect_temp.append([det.classe, det.color, point])
+
+
+        tracker.predict()
+        tracker.update(detections)
+
+
+        track_count = 0
+        for track in tracker.tracks:
+            if not track.is_confirmed() or track.time_since_update > 1: continue
+            y1, x1, y2, x2 = np.array(track.to_tlbr(), dtype=np.int32)
+            # cv2.rectangle(frame, (y1, x1), (y2, x2), (255, 255, 255), box_size//4)
+
+
+            track_total.append(track.track_id)
+            track_count += 1
+
+
+            point = (int((y1+y2)/2),int((x1+x2)/2))
+            # cv2.circle(frame, point, 1, (255, 255, 255), box_size)
+            pts[track.track_id].append(point)
+            # [ classe, color , point ]
+            for d in range(len(detect_temp)):
+                
+                if not isInSide(detect_temp[d][2], track.to_tlbr()): continue
+
+
+                if detect_temp[d][0] not in total_count: total_count[detect_temp[d][0]] = [0, []]
+                if track.track_id not in total_count[detect_temp[d][0]][1]:
+                    total_count[detect_temp[d][0]][0] += 1
+                    total_count[detect_temp[d][0]][1].append(track.track_id)
+
+                    label_path = os.path.join(out_path, "{0}/{1}".format('imageSeg', detect_temp[d][0]))
+                    if not os.path.exists(label_path): os.makedirs(label_path)
+                    # cv2.imwrite("{0}/{1}.jpg".format(label_path, track.track_id), frame_orig[x1:x2, y1:y2])
+
+
+                cv2.putText(
+                    frame, 
+                    "No. " + str(track.track_id),
+                    (y1, x1 - 15),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    font_scale, (255, 255, 255),
+                    box_size//2,
+                    lineType=cv2.LINE_AA
+                )
+
+
+                for j in range(1, len(pts[track.track_id])):
+                    if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None: continue
+                    thickness = int(np.sqrt(64 / float(j + 1)) * 2)
+                    cv2.line(frame, (pts[track.track_id][j-1]), (pts[track.track_id][j]), detect_temp[d][1], thickness)
+
+
+        trackTotalStr = 'Track Total: %s' % str(len(set(track_total)))
+        cv2.putText(frame, trackTotalStr, (20,20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (244, 67, 54), 1, cv2.LINE_AA)
+
+
+        trackCountStr = 'Track Count: %s' % str(track_count)
+        cv2.putText(frame, trackCountStr, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 193, 7), 1, cv2.LINE_AA)
+
+
+        totalStr = ""
+        for k in detect_count.keys(): totalStr += '%s: %d    ' % (k, detect_count[k])
+        cv2.putText(frame, totalStr, (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (50, 0, 255), 1, cv2.LINE_AA)
+
+        for i, label in enumerate(total_count):
+            labelTotal = '%s: %d ' % (label, total_count[label][0])
+            cv2.putText(frame, labelTotal, (20, 80 + 20 * i), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 87, 34), 1, cv2.LINE_AA)
+
+
+        curr_time = time.time()
+        exec_time = curr_time - prev_time
+        print("{:.0f}/{:.0f} , {:.2f} ms".format(frameNum, video.get(7), 1000*exec_time))
+
+        frameNum += 1
+
+        video_out.write(frame)
+
+        # result = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        # cv2.namedWindow("video_reult", cv2.WINDOW_AUTOSIZE)
+        # cv2.imshow("video_reult", result)
+
+        # if cv2.waitKey(1) & 0xFF == ord('q'): break
+
+
+    video.release()
+    video_out.release()
+    # cv2.destroyAllWindows()
+
+
+    totalFile = open(out_path + "/totalCount.txt","w")
+
+    for label in total_count.keys():
+        labelTotal = "{0}：{1} \n".format(label, total_count[label][0])
+        totalFile.write(labelTotal)
+       
+    totalFile.close()
+
+
+    with open(out_path + "/totalCount.json", 'w') as tc:
+        json.dump(total_count, tc)
diff --git a/src/tracker/detect_video_tracker_colors.py b/src/tracker/detect_video_tracker_colors.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c15240728ebb0048eb5e32fea1339cd38018b4c
--- /dev/null
+++ b/src/tracker/detect_video_tracker_colors.py
@@ -0,0 +1,193 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import time
+import json
+import argparse
+import sys, os
+import cv2
+import numpy as np
+from PIL import Image
+from moxing.framework import file
+
+from deep_sort import preprocessing
+from deep_sort import nn_matching
+from deep_sort.detection import Detection
+from deep_sort.tracker import Tracker
+from deep_sort import generate_detections
+from collections import deque
+
+from mrcnn.mrcnn_colors import MRCNN, isInSide
+
+obs_path = "obs://puddings/deep-sort-mask-rcnn/cailiao"
+
+out_path = "cailiao"
+
+if os.path.exists(out_path):
+    file.remove(out_path, recursive=True)
+os.makedirs(out_path)
+
+pts = [deque(maxlen=30) for _ in range(9999)]
+
+track_total = []
+
+total_count = {}
+
+frameNum = 0
+
+# python detect_video_tracker_colors.py --video_file test.mp4 --min_score 0.3 --input_size 1024 --model_file model_data/train_mask_rcnn.h5 --model_feature model_data/mars-small128.pb
+parser = argparse.ArgumentParser()
+parser.add_argument('--video_file', type=str, default='test.mp4', help='data mp4 file.')
+parser.add_argument('--min_score', type=float, default=0.3, help='displays the lowest tracking score.')
+parser.add_argument('--input_size', type=int, default=1024, help='input pic size.')
+parser.add_argument('--model_file', type=str, default='model_data/mask_rcnn_coco.h5', help='Object detection model file.')
+parser.add_argument('--model_feature', type=str, default='model_data/market1501.pb', help='target tracking model file.')
+ARGS = parser.parse_args()
+
+box_size = 2
+font_scale = 0.4
+
+if __name__ == '__main__':
+    # Deep SORT
+    encoder = generate_detections.create_box_encoder(ARGS.model_feature, batch_size=1)
+    metric = nn_matching.NearestNeighborDistanceMetric("cosine", ARGS.min_score, None)
+    tracker = Tracker(metric)
+
+    mrcnn = MRCNN(ARGS.model_file, ARGS.input_size, ARGS.min_score)
+
+    video = cv2.VideoCapture(ARGS.video_file)
+
+    fourcc = cv2.VideoWriter_fourcc(*'XVID')
+    fps = video.get(cv2.CAP_PROP_FPS)
+    size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+    video_out = cv2.VideoWriter(out_path + "/outputVideo.mp4", fourcc, fps, size)
+
+    while video.isOpened:
+        retval, frame = video.read()
+        if retval:
+            frame_orig = frame.copy()
+        else:
+            print("没有图像！尝试使用其他视频")
+            break
+
+        prev_time = time.time()
+
+        boxes, scores, classes, masks, colors = mrcnn.detect_result(frame, min_score)
+
+        features = encoder(frame, boxes)
+        detections = []
+        for bbox, score, classe, mask, color, feature in zip(boxes, scores, classes, masks, colors, features):
+            detections.append(Detection(bbox, score, classe, mask, color, feature))
+
+        boxes = np.array([d.tlwh for d in detections])
+        scores = np.array([d.score for d in detections])
+        indices = preprocessing.non_max_suppression(boxes, 1.0, scores)
+        detections = [detections[i] for i in indices]
+
+        detect_count = {}
+        detect_temp = []
+        for det in detections:
+            y1, x1, y2, x2 = np.array(det.to_tlbr(), dtype=np.int32)
+            caption = '{} {:.2f}'.format(det.classe, det.score) if det.classe else det.score
+
+            frame = mrcnn.apply_mask(frame, det.mask, det.color, 0.3)
+            for c in range(3): det.color += (int(det.color[c]*255),)
+            cv2.rectangle(frame, (y1, x1), (y2, x2), det.color[3:], box_size)
+
+            point = (int((y1+y2)/2),int((x1+x2)/2))
+            # cv2.circle(frame, point, 1, det.color[3:], box_size)
+
+            cv2.putText(
+                frame,
+                caption,
+                (y1, x1 - 5),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                font_scale, det.color[3:],
+                box_size//2,
+                lineType=cv2.LINE_AA
+            )
+            if det.classe not in detect_count: detect_count[det.classe] = 0
+            detect_count[det.classe] += 1
+            detect_temp.append([det.classe, det.color[3:], point])
+
+        tracker.predict()
+        tracker.update(detections)
+
+        track_count = 0
+        for track in tracker.tracks:
+            if not track.is_confirmed() or track.time_since_update > 1: continue
+            y1, x1, y2, x2 = np.array(track.to_tlbr(), dtype=np.int32)
+            # cv2.rectangle(frame, (y1, x1), (y2, x2), (255, 255, 255), box_size//4)
+
+            track_total.append(track.track_id)
+            track_count += 1
+
+            point = (int((y1+y2)/2),int((x1+x2)/2))
+            # cv2.circle(frame, point, 1, (255, 255, 255), box_size)
+            pts[track.track_id].append(point)
+            # [ classe, color , point ]
+            for d in range(len(detect_temp)):
+                if not isInSide(detect_temp[d][2], track.to_tlbr()): continue
+
+                if detect_temp[d][0] not in total_count: total_count[detect_temp[d][0]] = [0, []]
+                if track.track_id not in total_count[detect_temp[d][0]][1]:
+                    total_count[detect_temp[d][0]][0] += 1
+                    total_count[detect_temp[d][0]][1].append(track.track_id)
+                    label_path = os.path.join(out_path, "{0}/{1}".format('imageSeg', detect_temp[d][0]))
+                    if not os.path.exists(label_path): os.makedirs(label_path)
+                    cv2.imwrite("{0}/{1}.jpg".format(label_path, track.track_id), frame_orig[x1:x2, y1:y2])
+
+                cv2.putText(
+                    frame, 
+                    "No. " + str(track.track_id),
+                    (y1, x1 - 15),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    font_scale, (255, 255, 255),
+                    box_size//2,
+                    lineType=cv2.LINE_AA
+                )
+
+                for j in range(1, len(pts[track.track_id])):
+                    if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None: continue
+                    thickness = int(np.sqrt(64 / float(j + 1)) * 2)
+                    cv2.line(frame, (pts[track.track_id][j-1]), (pts[track.track_id][j]), detect_temp[d][1], thickness)
+
+        trackTotalStr = 'Track Total: %s' % str(len(set(track_total)))
+        cv2.putText(frame, trackTotalStr, (20,20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (244, 67, 54), 1, cv2.LINE_AA)
+
+        trackCountStr = 'Track Count: %s' % str(track_count)
+        cv2.putText(frame, trackCountStr, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 193, 7), 1, cv2.LINE_AA)
+
+        totalStr = ""
+        for k in detect_count.keys(): totalStr += '%s: %d    ' % (k, detect_count[k])
+        cv2.putText(frame, totalStr, (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (50, 0, 255), 1, cv2.LINE_AA)
+
+        for i, label in enumerate(total_count):
+            labelTotal = '%s: %d ' % (label, total_count[label][0])
+            cv2.putText(frame, labelTotal, (20, 80 + 20 * i), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 87, 34), 1, cv2.LINE_AA)
+
+        curr_time = time.time()
+        exec_time = curr_time - prev_time
+        print("识别帧：{:.0f}/{:.0f} , 识别耗时: {:.2f} ms".format(frameNum, video.get(7), 1000*exec_time))
+
+        frameNum += 1
+        video_out.write(frame)
+        # result = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        # cv2.namedWindow("video_reult", cv2.WINDOW_AUTOSIZE)
+        # cv2.imshow("video_reult", result)
+        # if cv2.waitKey(1) & 0xFF == ord('q'): break
+
+    video.release()
+    video_out.release()
+    # cv2.destroyAllWindows()
+
+    totalFile = open(out_path + "/totalCount.txt","w")
+    for label in total_count.keys():
+        labelTotal = "{0}：{1} \n".format(label, total_count[label][0])
+        totalFile.write(labelTotal)
+    totalFile.close()
+
+    with open(out_path + "/totalCount.json", 'w') as tc:
+        json.dump(total_count, tc)
+
+    file.copy_parallel(out_path, obs_path)
\ No newline at end of file
diff --git a/src/tracker/evaluate_motchallenge.py b/src/tracker/evaluate_motchallenge.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fd4155f00095c424e28c156b1ecad9bf55792bd
--- /dev/null
+++ b/src/tracker/evaluate_motchallenge.py
@@ -0,0 +1,52 @@
+import argparse
+import os
+import deep_sort_app
+
+
+def parse_args():
+    """ Parse command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="MOTChallenge evaluation")
+    parser.add_argument(
+        "--mot_dir", help="Path to MOTChallenge directory (train or test)",
+        required=True)
+    parser.add_argument(
+        "--detection_dir", help="Path to detections.", default="detections",
+        required=True)
+    parser.add_argument(
+        "--output_dir", help="Folder in which the results will be stored. Will "
+        "be created if it does not exist.", default="results")
+    parser.add_argument(
+        "--min_confidence", help="Detection confidence threshold. Disregard "
+        "all detections that have a confidence lower than this value.",
+        default=0.0, type=float)
+    parser.add_argument(
+        "--min_detection_height", help="Threshold on the detection bounding "
+        "box height. Detections with height smaller than this value are "
+        "disregarded", default=0, type=int)
+    parser.add_argument(
+        "--nms_max_overlap",  help="Non-maxima suppression threshold: Maximum "
+        "detection overlap.", default=1.0, type=float)
+    parser.add_argument(
+        "--max_cosine_distance", help="Gating threshold for cosine distance "
+        "metric (object appearance).", type=float, default=0.2)
+    parser.add_argument(
+        "--nn_budget", help="Maximum size of the appearance descriptors "
+        "gallery. If None, no budget is enforced.", type=int, default=100)
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    os.makedirs(args.output_dir, exist_ok=True)
+    sequences = os.listdir(args.mot_dir)
+    for sequence in sequences:
+        print("Running sequence %s" % sequence)
+        sequence_dir = os.path.join(args.mot_dir, sequence)
+        detection_file = os.path.join(args.detection_dir, "%s.npy" % sequence)
+        output_file = os.path.join(args.output_dir, "%s.txt" % sequence)
+        deep_sort_app.run(
+            sequence_dir, detection_file, output_file, args.min_confidence,
+            args.nms_max_overlap, args.min_detection_height,
+            args.max_cosine_distance, args.nn_budget, display=False)
\ No newline at end of file
diff --git a/src/tracker/generate_videos.py b/src/tracker/generate_videos.py
new file mode 100644
index 0000000000000000000000000000000000000000..513e616805994da3f3002c3ea5807809a1e0e81e
--- /dev/null
+++ b/src/tracker/generate_videos.py
@@ -0,0 +1,64 @@
+import os
+import argparse
+import show_results
+
+
+def convert(filename_in, filename_out, ffmpeg_executable="ffmpeg"):
+    import subprocess
+    command = [ffmpeg_executable, "-i", filename_in, "-c:v", "libx264",
+               "-preset", "slow", "-crf", "21", filename_out]
+    subprocess.call(command)
+
+
+def parse_args():
+    """ Parse command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="Siamese Tracking")
+    parser.add_argument(
+        "--mot_dir", help="Path to MOTChallenge directory (train or test)",
+        required=True)
+    parser.add_argument(
+        "--result_dir", help="Path to the folder with tracking output.",
+        required=True)
+    parser.add_argument(
+        "--output_dir", help="Folder to store the videos in. Will be created "
+        "if it does not exist.",
+        required=True)
+    parser.add_argument(
+        "--convert_h264", help="If true, convert videos to libx264 (requires "
+        "FFMPEG", default=False)
+    parser.add_argument(
+        "--update_ms", help="Time between consecutive frames in milliseconds. "
+        "Defaults to the frame_rate specified in seqinfo.ini, if available.",
+        default=None)
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    os.makedirs(args.output_dir, exist_ok=True)
+    for sequence_txt in os.listdir(args.result_dir):
+        sequence = os.path.splitext(sequence_txt)[0]
+        sequence_dir = os.path.join(args.mot_dir, sequence)
+        if not os.path.exists(sequence_dir):
+            continue
+        result_file = os.path.join(args.result_dir, sequence_txt)
+        update_ms = args.update_ms
+        video_filename = os.path.join(args.output_dir, "%s.avi" % sequence)
+
+        print("Saving %s to %s." % (sequence_txt, video_filename))
+        show_results.run(
+            sequence_dir, result_file, False, None, update_ms, video_filename)
+
+    if not args.convert_h264:
+        import sys
+        sys.exit()
+    for sequence_txt in os.listdir(args.result_dir):
+        sequence = os.path.splitext(sequence_txt)[0]
+        sequence_dir = os.path.join(args.mot_dir, sequence)
+        if not os.path.exists(sequence_dir):
+            continue
+        filename_in = os.path.join(args.output_dir, "%s.avi" % sequence)
+        filename_out = os.path.join(args.output_dir, "%s.mp4" % sequence)
+        convert(filename_in, filename_out)
\ No newline at end of file
diff --git a/src/tracker/main.py b/src/tracker/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed89c9c04558b8190cab3e8a391525b9c5d16eea
--- /dev/null
+++ b/src/tracker/main.py
@@ -0,0 +1,218 @@
+import argparse
+import os
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+import time
+import tensorflow as tf
+
+physical_devices = tf.config.experimental.list_physical_devices('GPU')
+if len(physical_devices) > 0:
+    tf.config.experimental.set_memory_growth(physical_devices[0], True)
+
+from PIL import Image
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+# deep sort imports
+from deep_sort import nn_matching
+from application_util import preprocessing
+from deep_sort.detection import Detection
+from deep_sort.tracker import Tracker
+from _tools_ import generate_detections as gdet
+# deepsort
+from mrcnn.mrcnn_color import MRCNN
+# ocr
+# from sts.demo.sts import handle_sts
+
+def _parse_args():
+    parser = argparse.ArgumentParser(description="")
+    
+    parser.add_argument("--model",
+                        help="detection model",
+                        type=str,
+                        default="./checkpoint/maskrcnn_signboard_ss.ckpt")
+    parser.add_argument("--input_size",
+                        help="input size",
+                        type=int,
+                        default=1024)
+    parser.add_argument("--score",
+                        help="score threshold",
+                        type=float,
+                        default=0.50)
+    parser.add_argument("--size",
+                        help="resize images to",
+                        type=int,
+                        default=1024)
+    parser.add_argument("--video",
+                        help="path to input video or set to 0 for webcam",
+                        type=str,
+                        default="./samples/demo.mp4")
+    parser.add_argument("--output",
+                        help="path to output video",
+                        type=str,
+                        default="./outputs/demo.mp4")
+    parser.add_argument("--output_format",
+                        help="codec used in VideoWriter when saving video to file",
+                        type=str,
+                        default='mp4v')
+    parser.add_argument("--dont_show",
+                        help="dont show video output",
+                        type=bool,
+                        default=True)
+    parser.add_argument("--info",
+                        help="show detailed info of tracked objects",
+                        type=bool,
+                        default=True)
+    parser.add_argument("--count",
+                        help="count objects being tracked on screen",
+                        type=bool,
+                        default=True)
+    
+    args = parser.parse_args()
+    return args
+
+def handle(args):
+    # Definition of the parameters
+    max_cosine_distance = 0.4
+    nn_budget = None
+    nms_max_overlap = 1.0
+    
+    # initialize deep sort
+    model_filename = 'checkpoint/signboard_2793.pb'
+    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
+    # calculate cosine distance metric
+    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
+    # initialize tracker
+    tracker = Tracker(metric)
+    
+    # initialize maskrcnn
+    mrcnn = MRCNN(args.model, args.input_size, args.score)
+
+    # load configuration for object detector
+    video_path = args.video
+
+    # begin video capture
+    try:
+        vid = cv2.VideoCapture(int(video_path))
+    except:
+        vid = cv2.VideoCapture(video_path)
+
+    out = None
+
+    # get video ready to save locally if flag is set
+    if args.output:
+        # by default VideoCapture returns float instead of int
+        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = int(vid.get(cv2.CAP_PROP_FPS))
+        codec = cv2.VideoWriter_fourcc(*args.output_format)
+        out = cv2.VideoWriter(args.output, codec, fps, (width, height))
+
+    frame_num = 0
+    # while video is running
+    while True:
+        return_value, frame = vid.read()
+        if return_value:
+            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            image = Image.fromarray(image)
+        else:
+            print('Video has ended or failed, try a different video format!')
+            break
+        frame_num +=1
+        print('Frame #: ', frame_num)
+        start_time = time.time()
+
+        boxes, scores, class_names, class_ids, class_color = mrcnn.detect_result_(image, min_score=0.5)
+        
+        count = len(class_names)
+        
+        if args.count:
+            cv2.putText(frame, "Objects being tracked: {0}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
+            print("Objects being tracked: {0}".format(count))
+
+        # encode yolo detections and feed to tracker
+        features = encoder(frame, boxes)
+        detections = [Detection(box, score, class_name, feature) for box, score, class_name, feature in zip(boxes, scores, class_names, features)]
+
+        #initialize color map
+        cmap = plt.get_cmap('tab20b')
+        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
+
+        # run non-maxima supression
+        boxs = np.array([d.tlwh for d in detections])
+        scores = np.array([d.confidence for d in detections])
+        classes = np.array([d.class_name for d in detections])
+        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
+        detections = [detections[i] for i in indices]       
+
+        # Call the tracker
+        tracker.predict()
+        tracker.update(detections)
+
+        # update tracks
+        with open("./outputs/{}.txt".format(frame_num), "a+", encoding="utf-8") as ff:
+            for track in tracker.tracks:
+                if not track.is_confirmed() or track.time_since_update > 1:
+                    continue  
+                bbox = track.to_tlbr()
+                
+            # crop to ids folder
+                ids_path = "./ids/"+str(track.track_id)
+                if not os.path.isdir(ids_path):
+                    os.mkdir(ids_path)
+                crop_ids = frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]
+                num_ids = 0
+                
+                while os.path.isfile(os.path.join(ids_path, str(track.track_id) + "_" + str(frame_num) + "_" + str(num_ids)+".png")):
+                    num_ids += 1
+                final_ids_path = os.path.join(ids_path, str(track.track_id) + "_" + str(frame_num) + "_" + str(num_ids)+".png")
+                cv2.imwrite(final_ids_path, crop_ids)
+            
+            for track in tracker.tracks:
+                if not track.is_confirmed() or track.time_since_update > 1:
+                    continue  
+                bbox = track.to_tlbr()
+                class_name = track.get_class()
+                
+            # predict ocr
+                crop_ids = frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]
+                dict_box_sign_out, dict_rec_sign_out = [], [] # handle_sts(crop_ids)
+            # draw bbox on screen
+                color = colors[int(track.track_id) % len(colors)]
+                color = [i * 255 for i in color]
+                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
+                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
+                cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
+
+                dict_rec_sign_out_join = "_".join(dict_rec_sign_out)
+                cv2.putText(frame, dict_rec_sign_out_join, (int(bbox[0]), int(bbox[1]+20)), 0, 0.75, (255, 255, 255), 2)
+
+            # if enable info flag then print details about each track
+                if args.info:
+                    print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
+                    ff.write("{}, {}, {}, {}, {}, {}\n".format(str(track.track_id), int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), dict_rec_sign_out_join))
+            ff.close()
+
+        # calculate frames per second of running detections
+        fps = 1.0 / (time.time() - start_time)
+        print("FPS: %.2f" % fps)
+        result = frame
+        
+        if not args.dont_show:
+            cv2.imshow("Output Video", result)
+        
+        # if output flag is set, save video file
+        if args.output:
+            cv2.imwrite("./outputs/{0}.jpg".format(frame_num), result)
+            out.write(result)
+        if cv2.waitKey(1) & 0xFF == ord('q'): break
+    cv2.destroyAllWindows()
+
+def main():
+    args = _parse_args()
+    handle(args)
+
+if __name__ == '__main__':
+    main()
+    
\ No newline at end of file
diff --git a/src/tracker/mask_rcnn/__init__.py b/src/tracker/mask_rcnn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/tracker/mask_rcnn/config.py b/src/tracker/mask_rcnn/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..b652b56ff69c7ebea135d6ed2b24464ecc0cbdc4
--- /dev/null
+++ b/src/tracker/mask_rcnn/config.py
@@ -0,0 +1,235 @@
+"""
+Mask R-CNN
+Base Configurations class.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+
+import numpy as np
+
+
+# Base Configuration Class
+# Don't use this class directly. Instead, sub-class it and override
+# the configurations you need to change.
+
+class Config(object):
+    """Base configuration class. For custom configurations, create a
+    sub-class that inherits from this one and override properties
+    that need to be changed.
+    """
+    # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
+    # Useful if your code needs to do things differently depending on which
+    # experiment is running.
+    NAME = None  # Override in sub-classes
+
+    # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1.
+    GPU_COUNT = 1
+
+    # Number of images to train with on each GPU. A 12GB GPU can typically
+    # handle 2 images of 1024x1024px.
+    # Adjust based on your GPU memory and image sizes. Use the highest
+    # number that your GPU can handle for best performance.
+    IMAGES_PER_GPU = 2
+
+    # Number of training steps per epoch
+    # This doesn't need to match the size of the training set. Tensorboard
+    # updates are saved at the end of each epoch, so setting this to a
+    # smaller number means getting more frequent TensorBoard updates.
+    # Validation stats are also calculated at each epoch end and they
+    # might take a while, so don't set this too small to avoid spending
+    # a lot of time on validation stats.
+    STEPS_PER_EPOCH = 1000
+
+    # Number of validation steps to run at the end of every training epoch.
+    # A bigger number improves accuracy of validation stats, but slows
+    # down the training.
+    VALIDATION_STEPS = 50
+
+    # Backbone network architecture
+    # Supported values are: resnet50, resnet101.
+    # You can also provide a callable that should have the signature
+    # of model.resnet_graph. If you do so, you need to supply a callable
+    # to COMPUTE_BACKBONE_SHAPE as well
+    BACKBONE = "resnet101"
+
+    # Only useful if you supply a callable to BACKBONE. Should compute
+    # the shape of each layer of the FPN Pyramid.
+    # See model.compute_backbone_shapes
+    COMPUTE_BACKBONE_SHAPE = None
+
+    # The strides of each layer of the FPN Pyramid. These values
+    # are based on a Resnet101 backbone.
+    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
+
+    # Size of the fully-connected layers in the classification graph
+    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
+
+    # Size of the top-down layers used to build the feature pyramid
+    TOP_DOWN_PYRAMID_SIZE = 256
+
+    # Number of classification classes (including background)
+    NUM_CLASSES = 1  # Override in sub-classes
+
+    # Length of square anchor side in pixels
+    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
+
+    # Ratios of anchors at each cell (width/height)
+    # A value of 1 represents a square anchor, and 0.5 is a wide anchor
+    RPN_ANCHOR_RATIOS = [0.5, 1, 2]
+
+    # Anchor stride
+    # If 1 then anchors are created for each cell in the backbone feature map.
+    # If 2, then anchors are created for every other cell, and so on.
+    RPN_ANCHOR_STRIDE = 1
+
+    # Non-max suppression threshold to filter RPN proposals.
+    # You can increase this during training to generate more propsals.
+    RPN_NMS_THRESHOLD = 0.7
+
+    # How many anchors per image to use for RPN training
+    RPN_TRAIN_ANCHORS_PER_IMAGE = 256
+    
+    # ROIs kept after tf.nn.top_k and before non-maximum suppression
+    PRE_NMS_LIMIT = 6000
+
+    # ROIs kept after non-maximum suppression (training and inference)
+    POST_NMS_ROIS_TRAINING = 2000
+    POST_NMS_ROIS_INFERENCE = 1000
+
+    # If enabled, resizes instance masks to a smaller size to reduce
+    # memory load. Recommended when using high-resolution images.
+    USE_MINI_MASK = True
+    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
+
+    # Input image resizing
+    # Generally, use the "square" resizing mode for training and predicting
+    # and it should work well in most cases. In this mode, images are scaled
+    # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
+    # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
+    # padded with zeros to make it a square so multiple images can be put
+    # in one batch.
+    # Available resizing modes:
+    # none:   No resizing or padding. Return the image unchanged.
+    # square: Resize and pad with zeros to get a square image
+    #         of size [max_dim, max_dim].
+    # pad64:  Pads width and height with zeros to make them multiples of 64.
+    #         If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
+    #         up before padding. IMAGE_MAX_DIM is ignored in this mode.
+    #         The multiple of 64 is needed to ensure smooth scaling of feature
+    #         maps up and down the 6 levels of the FPN pyramid (2**6=64).
+    # crop:   Picks random crops from the image. First, scales the image based
+    #         on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
+    #         size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
+    #         IMAGE_MAX_DIM is not used in this mode.
+    IMAGE_RESIZE_MODE = "square"
+    IMAGE_MIN_DIM = 800
+    IMAGE_MAX_DIM = 1024
+    # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
+    # up scaling. For example, if set to 2 then images are scaled up to double
+    # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
+    # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
+    IMAGE_MIN_SCALE = 0
+    # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
+    # Changing this requires other changes in the code. See the WIKI for more
+    # details: https://github.com/matterport/Mask_RCNN/wiki
+    IMAGE_CHANNEL_COUNT = 3
+
+    # Image mean (RGB)
+    MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
+
+    # Number of ROIs per image to feed to classifier/mask heads
+    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
+    # enough positive proposals to fill this and keep a positive:negative
+    # ratio of 1:3. You can increase the number of proposals by adjusting
+    # the RPN NMS threshold.
+    TRAIN_ROIS_PER_IMAGE = 200
+
+    # Percent of positive ROIs used to train classifier/mask heads
+    ROI_POSITIVE_RATIO = 0.33
+
+    # Pooled ROIs
+    POOL_SIZE = 7
+    MASK_POOL_SIZE = 14
+
+    # Shape of output mask
+    # To change this you also need to change the neural network mask branch
+    MASK_SHAPE = [28, 28]
+
+    # Maximum number of ground truth instances to use in one image
+    MAX_GT_INSTANCES = 100
+
+    # Bounding box refinement standard deviation for RPN and final detections.
+    RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
+    BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
+
+    # Max number of final detections
+    DETECTION_MAX_INSTANCES = 100
+
+    # Minimum probability value to accept a detected instance
+    # ROIs below this threshold are skipped
+    DETECTION_MIN_CONFIDENCE = 0.7
+
+    # Non-maximum suppression threshold for detection
+    DETECTION_NMS_THRESHOLD = 0.3
+
+    # Learning rate and momentum
+    # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
+    # weights to explode. Likely due to differences in optimizer
+    # implementation.
+    LEARNING_RATE = 0.001
+    LEARNING_MOMENTUM = 0.9
+
+    # Weight decay regularization
+    WEIGHT_DECAY = 0.0001
+
+    # Loss weights for more precise optimization.
+    # Can be used for R-CNN training setup.
+    LOSS_WEIGHTS = {
+        "rpn_class_loss": 1.,
+        "rpn_bbox_loss": 1.,
+        "mrcnn_class_loss": 1.,
+        "mrcnn_bbox_loss": 1.,
+        "mrcnn_mask_loss": 1.
+    }
+
+    # Use RPN ROIs or externally generated ROIs for training
+    # Keep this True for most situations. Set to False if you want to train
+    # the head branches on ROI generated by code rather than the ROIs from
+    # the RPN. For example, to debug the classifier head without having to
+    # train the RPN.
+    USE_RPN_ROIS = True
+
+    # Train or freeze batch normalization layers
+    #     None: Train BN layers. This is the normal mode
+    #     False: Freeze BN layers. Good when using a small batch size
+    #     True: (don't use). Set layer in training mode even when predicting
+    TRAIN_BN = False  # Defaulting to False since batch size is often small
+
+    # Gradient norm clipping
+    GRADIENT_CLIP_NORM = 5.0
+
+    def __init__(self):
+        """Set values of computed attributes."""
+        # Effective batch size
+        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
+
+        # Input image size
+        if self.IMAGE_RESIZE_MODE == "crop":
+            self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,
+                self.IMAGE_CHANNEL_COUNT])
+        else:
+            self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
+                self.IMAGE_CHANNEL_COUNT])
+
+        # Image meta data length
+        # See compose_image_meta() for details
+        self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
+
+    def display(self):
+        """Display Configuration values."""
+        print("\nConfigurations:")
+        for a in dir(self):
+            if not a.startswith("__") and not callable(getattr(self, a)):
+                print("{:30} {}".format(a, getattr(self, a)))
+        print("\n")
\ No newline at end of file
diff --git a/src/tracker/mask_rcnn/model.py b/src/tracker/mask_rcnn/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed775dbd28f3c7b9b6bc62b3702b2a4b6d293d0d
--- /dev/null
+++ b/src/tracker/mask_rcnn/model.py
@@ -0,0 +1,2804 @@
+"""
+Mask R-CNN
+The main Mask R-CNN model implementation.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+
+import os
+import random
+import datetime
+import re
+import math
+import logging
+from collections import OrderedDict
+import multiprocessing
+import numpy as np
+import tensorflow as tf
+import keras
+import keras.backend as K
+import keras.layers as KL
+import keras.engine as KE
+import keras.models as KM
+
+from mrcnn import utils
+
+# Requires TensorFlow 1.3+ and Keras 2.0.8+.
+from distutils.version import LooseVersion
+assert LooseVersion(tf.__version__) >= LooseVersion("1.3")
+assert LooseVersion(keras.__version__) >= LooseVersion('2.0.8')
+
+
+############################################################
+#  Utility Functions
+############################################################
+
+def log(text, array=None):
+    """Prints a text message. And, optionally, if a Numpy array is provided it
+    prints it's shape, min, and max values.
+    """
+    if array is not None:
+        text = text.ljust(25)
+        text += ("shape: {:20}  ".format(str(array.shape)))
+        if array.size:
+            text += ("min: {:10.5f}  max: {:10.5f}".format(array.min(),array.max()))
+        else:
+            text += ("min: {:10}  max: {:10}".format("",""))
+        text += "  {}".format(array.dtype)
+    print(text)
+
+
+class BatchNorm(KL.BatchNormalization):
+    """Extends the Keras BatchNormalization class to allow a central place
+    to make changes if needed.
+    Batch normalization has a negative effect on training if batches are small
+    so this layer is often frozen (via setting in Config class) and functions
+    as linear layer.
+    """
+    def call(self, inputs, training=None):
+        """
+        Note about training values:
+            None: Train BN layers. This is the normal mode
+            False: Freeze BN layers. Good when batch size is small
+            True: (don't use). Set layer in training mode even when making inferences
+        """
+        return super(self.__class__, self).call(inputs, training=training)
+
+
+def compute_backbone_shapes(config, image_shape):
+    """Computes the width and height of each stage of the backbone network.
+    Returns:
+        [N, (height, width)]. Where N is the number of stages
+    """
+    if callable(config.BACKBONE):
+        return config.COMPUTE_BACKBONE_SHAPE(image_shape)
+
+    # Currently supports ResNet only
+    assert config.BACKBONE in ["resnet50", "resnet101"]
+    return np.array(
+        [[int(math.ceil(image_shape[0] / stride)),
+            int(math.ceil(image_shape[1] / stride))]
+            for stride in config.BACKBONE_STRIDES])
+
+
+############################################################
+#  Resnet Graph
+############################################################
+
+# Code adopted from:
+# https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py
+
+def identity_block(input_tensor, kernel_size, filters, stage, block,
+                   use_bias=True, train_bn=True):
+    """The identity_block is the block that has no conv layer at shortcut
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: default 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the nb_filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+        use_bias: Boolean. To use or not use a bias in conv layers.
+        train_bn: Boolean. Train or freeze Batch Norm layers
+    """
+    nb_filter1, nb_filter2, nb_filter3 = filters
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+    x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a',
+                  use_bias=use_bias)(input_tensor)
+    x = BatchNorm(name=bn_name_base + '2a')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same',
+                  name=conv_name_base + '2b', use_bias=use_bias)(x)
+    x = BatchNorm(name=bn_name_base + '2b')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c',
+                  use_bias=use_bias)(x)
+    x = BatchNorm(name=bn_name_base + '2c')(x, training=train_bn)
+
+    x = KL.Add()([x, input_tensor])
+    x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x)
+    return x
+
+
+def conv_block(input_tensor, kernel_size, filters, stage, block,
+               strides=(2, 2), use_bias=True, train_bn=True):
+    """conv_block is the block that has a conv layer at shortcut
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: default 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the nb_filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+        use_bias: Boolean. To use or not use a bias in conv layers.
+        train_bn: Boolean. Train or freeze Batch Norm layers
+    Note that from stage 3, the first conv layer at main path is with subsample=(2,2)
+    And the shortcut should have subsample=(2,2) as well
+    """
+    nb_filter1, nb_filter2, nb_filter3 = filters
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+    x = KL.Conv2D(nb_filter1, (1, 1), strides=strides,
+                  name=conv_name_base + '2a', use_bias=use_bias)(input_tensor)
+    x = BatchNorm(name=bn_name_base + '2a')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same',
+                  name=conv_name_base + '2b', use_bias=use_bias)(x)
+    x = BatchNorm(name=bn_name_base + '2b')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base +
+                  '2c', use_bias=use_bias)(x)
+    x = BatchNorm(name=bn_name_base + '2c')(x, training=train_bn)
+
+    shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides,
+                         name=conv_name_base + '1', use_bias=use_bias)(input_tensor)
+    shortcut = BatchNorm(name=bn_name_base + '1')(shortcut, training=train_bn)
+
+    x = KL.Add()([x, shortcut])
+    x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x)
+    return x
+
+
+def resnet_graph(input_image, architecture, stage5=False, train_bn=True):
+    """Build a ResNet graph.
+        architecture: Can be resnet50 or resnet101
+        stage5: Boolean. If False, stage5 of the network is not created
+        train_bn: Boolean. Train or freeze Batch Norm layers
+    """
+    assert architecture in ["resnet50", "resnet101"]
+    # Stage 1
+    x = KL.ZeroPadding2D((3, 3))(input_image)
+    x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x)
+    x = BatchNorm(name='bn_conv1')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+    C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)
+    # Stage 2
+    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), train_bn=train_bn)
+    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', train_bn=train_bn)
+    C2 = x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', train_bn=train_bn)
+    # Stage 3
+    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', train_bn=train_bn)
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', train_bn=train_bn)
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', train_bn=train_bn)
+    C3 = x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', train_bn=train_bn)
+    # Stage 4
+    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', train_bn=train_bn)
+    block_count = {"resnet50": 5, "resnet101": 22}[architecture]
+    for i in range(block_count):
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i), train_bn=train_bn)
+    C4 = x
+    # Stage 5
+    if stage5:
+        x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', train_bn=train_bn)
+        x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', train_bn=train_bn)
+        C5 = x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', train_bn=train_bn)
+    else:
+        C5 = None
+    return [C1, C2, C3, C4, C5]
+
+
+############################################################
+#  Proposal Layer
+############################################################
+
+def apply_box_deltas_graph(boxes, deltas):
+    """Applies the given deltas to the given boxes.
+    boxes: [N, (y1, x1, y2, x2)] boxes to update
+    deltas: [N, (dy, dx, log(dh), log(dw))] refinements to apply
+    """
+    # Convert to y, x, h, w
+    height = boxes[:, 2] - boxes[:, 0]
+    width = boxes[:, 3] - boxes[:, 1]
+    center_y = boxes[:, 0] + 0.5 * height
+    center_x = boxes[:, 1] + 0.5 * width
+    # Apply deltas
+    center_y += deltas[:, 0] * height
+    center_x += deltas[:, 1] * width
+    height *= tf.exp(deltas[:, 2])
+    width *= tf.exp(deltas[:, 3])
+    # Convert back to y1, x1, y2, x2
+    y1 = center_y - 0.5 * height
+    x1 = center_x - 0.5 * width
+    y2 = y1 + height
+    x2 = x1 + width
+    result = tf.stack([y1, x1, y2, x2], axis=1, name="apply_box_deltas_out")
+    return result
+
+
+def clip_boxes_graph(boxes, window):
+    """
+    boxes: [N, (y1, x1, y2, x2)]
+    window: [4] in the form y1, x1, y2, x2
+    """
+    # Split
+    wy1, wx1, wy2, wx2 = tf.split(window, 4)
+    y1, x1, y2, x2 = tf.split(boxes, 4, axis=1)
+    # Clip
+    y1 = tf.maximum(tf.minimum(y1, wy2), wy1)
+    x1 = tf.maximum(tf.minimum(x1, wx2), wx1)
+    y2 = tf.maximum(tf.minimum(y2, wy2), wy1)
+    x2 = tf.maximum(tf.minimum(x2, wx2), wx1)
+    clipped = tf.concat([y1, x1, y2, x2], axis=1, name="clipped_boxes")
+    clipped.set_shape((clipped.shape[0], 4))
+    return clipped
+
+
+class ProposalLayer(KE.Layer):
+    """Receives anchor scores and selects a subset to pass as proposals
+    to the second stage. Filtering is done based on anchor scores and
+    non-max suppression to remove overlaps. It also applies bounding
+    box refinement deltas to anchors.
+    Inputs:
+        rpn_probs: [batch, num_anchors, (bg prob, fg prob)]
+        rpn_bbox: [batch, num_anchors, (dy, dx, log(dh), log(dw))]
+        anchors: [batch, num_anchors, (y1, x1, y2, x2)] anchors in normalized coordinates
+    Returns:
+        Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
+    """
+
+    def __init__(self, proposal_count, nms_threshold, config=None, **kwargs):
+        super(ProposalLayer, self).__init__(**kwargs)
+        self.config = config
+        self.proposal_count = proposal_count
+        self.nms_threshold = nms_threshold
+
+    def call(self, inputs):
+        # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
+        scores = inputs[0][:, :, 1]
+        # Box deltas [batch, num_rois, 4]
+        deltas = inputs[1]
+        deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4])
+        # Anchors
+        anchors = inputs[2]
+
+        # Improve performance by trimming to top anchors by score
+        # and doing the rest on the smaller subset.
+        pre_nms_limit = tf.minimum(self.config.PRE_NMS_LIMIT, tf.shape(anchors)[1])
+        ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True,
+                         name="top_anchors").indices
+        scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y),
+                                   self.config.IMAGES_PER_GPU)
+        deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y),
+                                   self.config.IMAGES_PER_GPU)
+        pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x),
+                                    self.config.IMAGES_PER_GPU,
+                                    names=["pre_nms_anchors"])
+
+        # Apply deltas to anchors to get refined anchors.
+        # [batch, N, (y1, x1, y2, x2)]
+        boxes = utils.batch_slice([pre_nms_anchors, deltas],
+                                  lambda x, y: apply_box_deltas_graph(x, y),
+                                  self.config.IMAGES_PER_GPU,
+                                  names=["refined_anchors"])
+
+        # Clip to image boundaries. Since we're in normalized coordinates,
+        # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)]
+        window = np.array([0, 0, 1, 1], dtype=np.float32)
+        boxes = utils.batch_slice(boxes,
+                                  lambda x: clip_boxes_graph(x, window),
+                                  self.config.IMAGES_PER_GPU,
+                                  names=["refined_anchors_clipped"])
+
+        # Filter out small boxes
+        # According to Xinlei Chen's paper, this reduces detection accuracy
+        # for small objects, so we're skipping it.
+
+        # Non-max suppression
+        def nms(boxes, scores):
+            indices = tf.image.non_max_suppression(
+                boxes, scores, self.proposal_count,
+                self.nms_threshold, name="rpn_non_max_suppression")
+            proposals = tf.gather(boxes, indices)
+            # Pad if needed
+            padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0)
+            proposals = tf.pad(proposals, [(0, padding), (0, 0)])
+            return proposals
+        proposals = utils.batch_slice([boxes, scores], nms,
+                                      self.config.IMAGES_PER_GPU)
+        return proposals
+
+    def compute_output_shape(self, input_shape):
+        return (None, self.proposal_count, 4)
+
+
+############################################################
+#  ROIAlign Layer
+############################################################
+
+def log2_graph(x):
+    """Implementation of Log2. TF doesn't have a native implementation."""
+    return tf.log(x) / tf.log(2.0)
+
+
+class PyramidROIAlign(KE.Layer):
+    """Implements ROI Pooling on multiple levels of the feature pyramid.
+    Params:
+    - pool_shape: [pool_height, pool_width] of the output pooled regions. Usually [7, 7]
+    Inputs:
+    - boxes: [batch, num_boxes, (y1, x1, y2, x2)] in normalized
+             coordinates. Possibly padded with zeros if not enough
+             boxes to fill the array.
+    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
+    - feature_maps: List of feature maps from different levels of the pyramid.
+                    Each is [batch, height, width, channels]
+    Output:
+    Pooled regions in the shape: [batch, num_boxes, pool_height, pool_width, channels].
+    The width and height are those specific in the pool_shape in the layer
+    constructor.
+    """
+
+    def __init__(self, pool_shape, **kwargs):
+        super(PyramidROIAlign, self).__init__(**kwargs)
+        self.pool_shape = tuple(pool_shape)
+
+    def call(self, inputs):
+        # Crop boxes [batch, num_boxes, (y1, x1, y2, x2)] in normalized coords
+        boxes = inputs[0]
+
+        # Image meta
+        # Holds details about the image. See compose_image_meta()
+        image_meta = inputs[1]
+
+        # Feature Maps. List of feature maps from different level of the
+        # feature pyramid. Each is [batch, height, width, channels]
+        feature_maps = inputs[2:]
+
+        # Assign each ROI to a level in the pyramid based on the ROI area.
+        y1, x1, y2, x2 = tf.split(boxes, 4, axis=2)
+        h = y2 - y1
+        w = x2 - x1
+        # Use shape of first image. Images in a batch must have the same size.
+        image_shape = parse_image_meta_graph(image_meta)['image_shape'][0]
+        # Equation 1 in the Feature Pyramid Networks paper. Account for
+        # the fact that our coordinates are normalized here.
+        # e.g. a 224x224 ROI (in pixels) maps to P4
+        image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32)
+        roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area)))
+        roi_level = tf.minimum(5, tf.maximum(
+            2, 4 + tf.cast(tf.round(roi_level), tf.int32)))
+        roi_level = tf.squeeze(roi_level, 2)
+
+        # Loop through levels and apply ROI pooling to each. P2 to P5.
+        pooled = []
+        box_to_level = []
+        for i, level in enumerate(range(2, 6)):
+            ix = tf.where(tf.equal(roi_level, level))
+            level_boxes = tf.gather_nd(boxes, ix)
+
+            # Box indices for crop_and_resize.
+            box_indices = tf.cast(ix[:, 0], tf.int32)
+
+            # Keep track of which box is mapped to which level
+            box_to_level.append(ix)
+
+            # Stop gradient propogation to ROI proposals
+            level_boxes = tf.stop_gradient(level_boxes)
+            box_indices = tf.stop_gradient(box_indices)
+
+            # Crop and Resize
+            # From Mask R-CNN paper: "We sample four regular locations, so
+            # that we can evaluate either max or average pooling. In fact,
+            # interpolating only a single value at each bin center (without
+            # pooling) is nearly as effective."
+            #
+            # Here we use the simplified approach of a single value per bin,
+            # which is how it's done in tf.crop_and_resize()
+            # Result: [batch * num_boxes, pool_height, pool_width, channels]
+            pooled.append(tf.image.crop_and_resize(
+                feature_maps[i], level_boxes, box_indices, self.pool_shape,
+                method="bilinear"))
+
+        # Pack pooled features into one tensor
+        pooled = tf.concat(pooled, axis=0)
+
+        # Pack box_to_level mapping into one array and add another
+        # column representing the order of pooled boxes
+        box_to_level = tf.concat(box_to_level, axis=0)
+        box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1)
+        box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range],
+                                 axis=1)
+
+        # Rearrange pooled features to match the order of the original boxes
+        # Sort box_to_level by batch then box index
+        # TF doesn't have a way to sort by two columns, so merge them and sort.
+        sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1]
+        ix = tf.nn.top_k(sorting_tensor, k=tf.shape(
+            box_to_level)[0]).indices[::-1]
+        ix = tf.gather(box_to_level[:, 2], ix)
+        pooled = tf.gather(pooled, ix)
+
+        # Re-add the batch dimension
+        shape = tf.concat([tf.shape(boxes)[:2], tf.shape(pooled)[1:]], axis=0)
+        pooled = tf.reshape(pooled, shape)
+        return pooled
+
+    def compute_output_shape(self, input_shape):
+        return input_shape[0][:2] + self.pool_shape + (input_shape[2][-1], )
+
+
+############################################################
+#  Detection Target Layer
+############################################################
+
+def overlaps_graph(boxes1, boxes2):
+    """Computes IoU overlaps between two sets of boxes.
+    boxes1, boxes2: [N, (y1, x1, y2, x2)].
+    """
+    # 1. Tile boxes2 and repeat boxes1. This allows us to compare
+    # every boxes1 against every boxes2 without loops.
+    # TF doesn't have an equivalent to np.repeat() so simulate it
+    # using tf.tile() and tf.reshape.
+    b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1),
+                            [1, 1, tf.shape(boxes2)[0]]), [-1, 4])
+    b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1])
+    # 2. Compute intersections
+    b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1)
+    b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1)
+    y1 = tf.maximum(b1_y1, b2_y1)
+    x1 = tf.maximum(b1_x1, b2_x1)
+    y2 = tf.minimum(b1_y2, b2_y2)
+    x2 = tf.minimum(b1_x2, b2_x2)
+    intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0)
+    # 3. Compute unions
+    b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1)
+    b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1)
+    union = b1_area + b2_area - intersection
+    # 4. Compute IoU and reshape to [boxes1, boxes2]
+    iou = intersection / union
+    overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]])
+    return overlaps
+
+
+def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config):
+    """Generates detection targets for one image. Subsamples proposals and
+    generates target class IDs, bounding box deltas, and masks for each.
+    Inputs:
+    proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates. Might
+               be zero padded if there are not enough proposals.
+    gt_class_ids: [MAX_GT_INSTANCES] int class IDs
+    gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.
+    gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type.
+    Returns: Target ROIs and corresponding class IDs, bounding box shifts,
+    and masks.
+    rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
+    class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
+    deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))]
+    masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox
+           boundaries and resized to neural network output size.
+    Note: Returned arrays might be zero padded if not enough target ROIs.
+    """
+    # Assertions
+    asserts = [
+        tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals],
+                  name="roi_assertion"),
+    ]
+    with tf.control_dependencies(asserts):
+        proposals = tf.identity(proposals)
+
+    # Remove zero padding
+    proposals, _ = trim_zeros_graph(proposals, name="trim_proposals")
+    gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
+    gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros,
+                                   name="trim_gt_class_ids")
+    gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2,
+                         name="trim_gt_masks")
+
+    # Handle COCO crowds
+    # A crowd box in COCO is a bounding box around several instances. Exclude
+    # them from training. A crowd box is given a negative class ID.
+    crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
+    non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
+    crowd_boxes = tf.gather(gt_boxes, crowd_ix)
+    gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
+    gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
+    gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)
+
+    # Compute overlaps matrix [proposals, gt_boxes]
+    overlaps = overlaps_graph(proposals, gt_boxes)
+
+    # Compute overlaps with crowd boxes [proposals, crowd_boxes]
+    crowd_overlaps = overlaps_graph(proposals, crowd_boxes)
+    crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
+    no_crowd_bool = (crowd_iou_max < 0.001)
+
+    # Determine positive and negative ROIs
+    roi_iou_max = tf.reduce_max(overlaps, axis=1)
+    # 1. Positive ROIs are those with >= 0.5 IoU with a GT box
+    positive_roi_bool = (roi_iou_max >= 0.5)
+    positive_indices = tf.where(positive_roi_bool)[:, 0]
+    # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds.
+    negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]
+
+    # Subsample ROIs. Aim for 33% positive
+    # Positive ROIs
+    positive_count = int(config.TRAIN_ROIS_PER_IMAGE *
+                         config.ROI_POSITIVE_RATIO)
+    positive_indices = tf.random_shuffle(positive_indices)[:positive_count]
+    positive_count = tf.shape(positive_indices)[0]
+    # Negative ROIs. Add enough to maintain positive:negative ratio.
+    r = 1.0 / config.ROI_POSITIVE_RATIO
+    negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
+    negative_indices = tf.random_shuffle(negative_indices)[:negative_count]
+    # Gather selected ROIs
+    positive_rois = tf.gather(proposals, positive_indices)
+    negative_rois = tf.gather(proposals, negative_indices)
+
+    # Assign positive ROIs to GT boxes.
+    positive_overlaps = tf.gather(overlaps, positive_indices)
+    roi_gt_box_assignment = tf.cond(
+        tf.greater(tf.shape(positive_overlaps)[1], 0),
+        true_fn = lambda: tf.argmax(positive_overlaps, axis=1),
+        false_fn = lambda: tf.cast(tf.constant([]),tf.int64)
+    )
+    roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
+    roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
+
+    # Compute bbox refinement for positive ROIs
+    deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes)
+    deltas /= config.BBOX_STD_DEV
+
+    # Assign positive ROIs to GT masks
+    # Permute masks to [N, height, width, 1]
+    transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
+    # Pick the right mask for each ROI
+    roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)
+
+    # Compute mask targets
+    boxes = positive_rois
+    if config.USE_MINI_MASK:
+        # Transform ROI coordinates from normalized image space
+        # to normalized mini-mask space.
+        y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
+        gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
+        gt_h = gt_y2 - gt_y1
+        gt_w = gt_x2 - gt_x1
+        y1 = (y1 - gt_y1) / gt_h
+        x1 = (x1 - gt_x1) / gt_w
+        y2 = (y2 - gt_y1) / gt_h
+        x2 = (x2 - gt_x1) / gt_w
+        boxes = tf.concat([y1, x1, y2, x2], 1)
+    box_ids = tf.range(0, tf.shape(roi_masks)[0])
+    masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes,
+                                     box_ids,
+                                     config.MASK_SHAPE)
+    # Remove the extra dimension from masks.
+    masks = tf.squeeze(masks, axis=3)
+
+    # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
+    # binary cross entropy loss.
+    masks = tf.round(masks)
+
+    # Append negative ROIs and pad bbox deltas and masks that
+    # are not used for negative ROIs with zeros.
+    rois = tf.concat([positive_rois, negative_rois], axis=0)
+    N = tf.shape(negative_rois)[0]
+    P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
+    rois = tf.pad(rois, [(0, P), (0, 0)])
+    roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
+    roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
+    deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
+    masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])
+
+    return rois, roi_gt_class_ids, deltas, masks
+
+
+class DetectionTargetLayer(KE.Layer):
+    """Subsamples proposals and generates target box refinement, class_ids,
+    and masks for each.
+    Inputs:
+    proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might
+               be zero padded if there are not enough proposals.
+    gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs.
+    gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized
+              coordinates.
+    gt_masks: [batch, height, width, MAX_GT_INSTANCES] of boolean type
+    Returns: Target ROIs and corresponding class IDs, bounding box shifts,
+    and masks.
+    rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized
+          coordinates
+    target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
+    target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]
+    target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]
+                 Masks cropped to bbox boundaries and resized to neural
+                 network output size.
+    Note: Returned arrays might be zero padded if not enough target ROIs.
+    """
+
+    def __init__(self, config, **kwargs):
+        super(DetectionTargetLayer, self).__init__(**kwargs)
+        self.config = config
+
+    def call(self, inputs):
+        proposals = inputs[0]
+        gt_class_ids = inputs[1]
+        gt_boxes = inputs[2]
+        gt_masks = inputs[3]
+
+        # Slice the batch and run a graph for each slice
+        # TODO: Rename target_bbox to target_deltas for clarity
+        names = ["rois", "target_class_ids", "target_bbox", "target_mask"]
+        outputs = utils.batch_slice(
+            [proposals, gt_class_ids, gt_boxes, gt_masks],
+            lambda w, x, y, z: detection_targets_graph(
+                w, x, y, z, self.config),
+            self.config.IMAGES_PER_GPU, names=names)
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        return [
+            (None, self.config.TRAIN_ROIS_PER_IMAGE, 4),  # rois
+            (None, self.config.TRAIN_ROIS_PER_IMAGE),  # class_ids
+            (None, self.config.TRAIN_ROIS_PER_IMAGE, 4),  # deltas
+            (None, self.config.TRAIN_ROIS_PER_IMAGE, self.config.MASK_SHAPE[0],
+             self.config.MASK_SHAPE[1])  # masks
+        ]
+
+    def compute_mask(self, inputs, mask=None):
+        return [None, None, None, None]
+
+
+############################################################
+#  Detection Layer
+############################################################
+
+def refine_detections_graph(rois, probs, deltas, window, config):
+    """Refine classified proposals and filter overlaps and return final
+    detections.
+    Inputs:
+        rois: [N, (y1, x1, y2, x2)] in normalized coordinates
+        probs: [N, num_classes]. Class probabilities.
+        deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific
+                bounding box deltas.
+        window: (y1, x1, y2, x2) in normalized coordinates. The part of the image
+            that contains the image excluding the padding.
+    Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where
+        coordinates are normalized.
+    """
+    # Class IDs per ROI
+    class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
+    # Class probability of the top class of each ROI
+    indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1)
+    class_scores = tf.gather_nd(probs, indices)
+    # Class-specific bounding box deltas
+    deltas_specific = tf.gather_nd(deltas, indices)
+    # Apply bounding box deltas
+    # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
+    refined_rois = apply_box_deltas_graph(
+        rois, deltas_specific * config.BBOX_STD_DEV)
+    # Clip boxes to image window
+    refined_rois = clip_boxes_graph(refined_rois, window)
+
+    # TODO: Filter out boxes with zero area
+
+    # Filter out background boxes
+    keep = tf.where(class_ids > 0)[:, 0]
+    # Filter out low confidence boxes
+    if config.DETECTION_MIN_CONFIDENCE:
+        conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0]
+        keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
+                                        tf.expand_dims(conf_keep, 0))
+        keep = tf.sparse_tensor_to_dense(keep)[0]
+
+    # Apply per-class NMS
+    # 1. Prepare variables
+    pre_nms_class_ids = tf.gather(class_ids, keep)
+    pre_nms_scores = tf.gather(class_scores, keep)
+    pre_nms_rois = tf.gather(refined_rois,   keep)
+    unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]
+
+    def nms_keep_map(class_id):
+        """Apply Non-Maximum Suppression on ROIs of the given class."""
+        # Indices of ROIs of the given class
+        ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
+        # Apply NMS
+        class_keep = tf.image.non_max_suppression(
+                tf.gather(pre_nms_rois, ixs),
+                tf.gather(pre_nms_scores, ixs),
+                max_output_size=config.DETECTION_MAX_INSTANCES,
+                iou_threshold=config.DETECTION_NMS_THRESHOLD)
+        # Map indices
+        class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
+        # Pad with -1 so returned tensors have the same shape
+        gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0]
+        class_keep = tf.pad(class_keep, [(0, gap)],
+                            mode='CONSTANT', constant_values=-1)
+        # Set shape so map_fn() can infer result shape
+        class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
+        return class_keep
+
+    # 2. Map over class IDs
+    nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids,
+                         dtype=tf.int64)
+    # 3. Merge results into one list, and remove -1 padding
+    nms_keep = tf.reshape(nms_keep, [-1])
+    nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
+    # 4. Compute intersection between keep and nms_keep
+    keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
+                                    tf.expand_dims(nms_keep, 0))
+    keep = tf.sparse_tensor_to_dense(keep)[0]
+    # Keep top detections
+    roi_count = config.DETECTION_MAX_INSTANCES
+    class_scores_keep = tf.gather(class_scores, keep)
+    num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
+    top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
+    keep = tf.gather(keep, top_ids)
+
+    # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
+    # Coordinates are normalized.
+    detections = tf.concat([
+        tf.gather(refined_rois, keep),
+        tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis],
+        tf.gather(class_scores, keep)[..., tf.newaxis]
+        ], axis=1)
+
+    # Pad with zeros if detections < DETECTION_MAX_INSTANCES
+    gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
+    detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
+    return detections
+
+
+class DetectionLayer(KE.Layer):
+    """Takes classified proposal boxes and their bounding box deltas and
+    returns the final detection boxes.
+    Returns:
+    [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where
+    coordinates are normalized.
+    """
+
+    def __init__(self, config=None, **kwargs):
+        super(DetectionLayer, self).__init__(**kwargs)
+        self.config = config
+
+    def call(self, inputs):
+        rois = inputs[0]
+        mrcnn_class = inputs[1]
+        mrcnn_bbox = inputs[2]
+        image_meta = inputs[3]
+
+        # Get windows of images in normalized coordinates. Windows are the area
+        # in the image that excludes the padding.
+        # Use the shape of the first image in the batch to normalize the window
+        # because we know that all images get resized to the same size.
+        m = parse_image_meta_graph(image_meta)
+        image_shape = m['image_shape'][0]
+        window = norm_boxes_graph(m['window'], image_shape[:2])
+
+        # Run detection refinement graph on each item in the batch
+        detections_batch = utils.batch_slice(
+            [rois, mrcnn_class, mrcnn_bbox, window],
+            lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config),
+            self.config.IMAGES_PER_GPU)
+
+        # Reshape output
+        # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in
+        # normalized coordinates
+        return tf.reshape(
+            detections_batch,
+            [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
+
+    def compute_output_shape(self, input_shape):
+        return (None, self.config.DETECTION_MAX_INSTANCES, 6)
+
+
+############################################################
+#  Region Proposal Network (RPN)
+############################################################
+
+def rpn_graph(feature_map, anchors_per_location, anchor_stride):
+    """Builds the computation graph of Region Proposal Network.
+    feature_map: backbone features [batch, height, width, depth]
+    anchors_per_location: number of anchors per pixel in the feature map
+    anchor_stride: Controls the density of anchors. Typically 1 (anchors for
+                   every pixel in the feature map), or 2 (every other pixel).
+    Returns:
+        rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax)
+        rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities.
+        rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be
+                  applied to anchors.
+    """
+    # TODO: check if stride of 2 causes alignment issues if the feature map
+    # is not even.
+    # Shared convolutional base of the RPN
+    shared = KL.Conv2D(512, (3, 3), padding='same', activation='relu',
+                       strides=anchor_stride,
+                       name='rpn_conv_shared')(feature_map)
+
+    # Anchor Score. [batch, height, width, anchors per location * 2].
+    x = KL.Conv2D(2 * anchors_per_location, (1, 1), padding='valid',
+                  activation='linear', name='rpn_class_raw')(shared)
+
+    # Reshape to [batch, anchors, 2]
+    rpn_class_logits = KL.Lambda(
+        lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 2]))(x)
+
+    # Softmax on last dimension of BG/FG.
+    rpn_probs = KL.Activation(
+        "softmax", name="rpn_class_xxx")(rpn_class_logits)
+
+    # Bounding box refinement. [batch, H, W, anchors per location * depth]
+    # where depth is [x, y, log(w), log(h)]
+    x = KL.Conv2D(anchors_per_location * 4, (1, 1), padding="valid",
+                  activation='linear', name='rpn_bbox_pred')(shared)
+
+    # Reshape to [batch, anchors, 4]
+    rpn_bbox = KL.Lambda(lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 4]))(x)
+
+    return [rpn_class_logits, rpn_probs, rpn_bbox]
+
+
+def build_rpn_model(anchor_stride, anchors_per_location, depth):
+    """Builds a Keras model of the Region Proposal Network.
+    It wraps the RPN graph so it can be used multiple times with shared
+    weights.
+    anchors_per_location: number of anchors per pixel in the feature map
+    anchor_stride: Controls the density of anchors. Typically 1 (anchors for
+                   every pixel in the feature map), or 2 (every other pixel).
+    depth: Depth of the backbone feature map.
+    Returns a Keras Model object. The model outputs, when called, are:
+    rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax)
+    rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities.
+    rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be
+                applied to anchors.
+    """
+    input_feature_map = KL.Input(shape=[None, None, depth],
+                                 name="input_rpn_feature_map")
+    outputs = rpn_graph(input_feature_map, anchors_per_location, anchor_stride)
+    return KM.Model([input_feature_map], outputs, name="rpn_model")
+
+
+############################################################
+#  Feature Pyramid Network Heads
+############################################################
+
+def fpn_classifier_graph(rois, feature_maps, image_meta,
+                         pool_size, num_classes, train_bn=True,
+                         fc_layers_size=1024):
+    """Builds the computation graph of the feature pyramid network classifier
+    and regressor heads.
+    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
+          coordinates.
+    feature_maps: List of feature maps from different layers of the pyramid,
+                  [P2, P3, P4, P5]. Each has a different resolution.
+    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
+    pool_size: The width of the square feature map generated from ROI Pooling.
+    num_classes: number of classes, which determines the depth of the results
+    train_bn: Boolean. Train or freeze Batch Norm layers
+    fc_layers_size: Size of the 2 FC layers
+    Returns:
+        logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax)
+        probs: [batch, num_rois, NUM_CLASSES] classifier probabilities
+        bbox_deltas: [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to
+                     proposal boxes
+    """
+    # ROI Pooling
+    # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels]
+    x = PyramidROIAlign([pool_size, pool_size],
+                        name="roi_align_classifier")([rois, image_meta] + feature_maps)
+    # Two 1024 FC layers (implemented with Conv2D for consistency)
+    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
+                           name="mrcnn_class_conv1")(x)
+    x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn1')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
+                           name="mrcnn_class_conv2")(x)
+    x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn2')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
+                       name="pool_squeeze")(x)
+
+    # Classifier head
+    mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
+                                            name='mrcnn_class_logits')(shared)
+    mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"),
+                                     name="mrcnn_class")(mrcnn_class_logits)
+
+    # BBox head
+    # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))]
+    x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'),
+                           name='mrcnn_bbox_fc')(shared)
+    # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
+    s = K.int_shape(x)
+    mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x)
+
+    return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
+
+
+def build_fpn_mask_graph(rois, feature_maps, image_meta,
+                         pool_size, num_classes, train_bn=True):
+    """Builds the computation graph of the mask head of Feature Pyramid Network.
+    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
+          coordinates.
+    feature_maps: List of feature maps from different layers of the pyramid,
+                  [P2, P3, P4, P5]. Each has a different resolution.
+    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
+    pool_size: The width of the square feature map generated from ROI Pooling.
+    num_classes: number of classes, which determines the depth of the results
+    train_bn: Boolean. Train or freeze Batch Norm layers
+    Returns: Masks [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, NUM_CLASSES]
+    """
+    # ROI Pooling
+    # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels]
+    x = PyramidROIAlign([pool_size, pool_size],
+                        name="roi_align_mask")([rois, image_meta] + feature_maps)
+
+    # Conv layers
+    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
+                           name="mrcnn_mask_conv1")(x)
+    x = KL.TimeDistributed(BatchNorm(),
+                           name='mrcnn_mask_bn1')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
+                           name="mrcnn_mask_conv2")(x)
+    x = KL.TimeDistributed(BatchNorm(),
+                           name='mrcnn_mask_bn2')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
+                           name="mrcnn_mask_conv3")(x)
+    x = KL.TimeDistributed(BatchNorm(),
+                           name='mrcnn_mask_bn3')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
+                           name="mrcnn_mask_conv4")(x)
+    x = KL.TimeDistributed(BatchNorm(),
+                           name='mrcnn_mask_bn4')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"),
+                           name="mrcnn_mask_deconv")(x)
+    x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"),
+                           name="mrcnn_mask")(x)
+    return x
+
+
+############################################################
+#  Loss Functions
+############################################################
+
+def smooth_l1_loss(y_true, y_pred):
+    """Implements Smooth-L1 loss.
+    y_true and y_pred are typically: [N, 4], but could be any shape.
+    """
+    diff = K.abs(y_true - y_pred)
+    less_than_one = K.cast(K.less(diff, 1.0), "float32")
+    loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)
+    return loss
+
+
+def rpn_class_loss_graph(rpn_match, rpn_class_logits):
+    """RPN anchor classifier loss.
+    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
+               -1=negative, 0=neutral anchor.
+    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for BG/FG.
+    """
+    # Squeeze last dim to simplify
+    rpn_match = tf.squeeze(rpn_match, -1)
+    # Get anchor classes. Convert the -1/+1 match to 0/1 values.
+    anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32)
+    # Positive and Negative anchors contribute to the loss,
+    # but neutral anchors (match value = 0) don't.
+    indices = tf.where(K.not_equal(rpn_match, 0))
+    # Pick rows that contribute to the loss and filter out the rest.
+    rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)
+    anchor_class = tf.gather_nd(anchor_class, indices)
+    # Cross entropy loss
+    loss = K.sparse_categorical_crossentropy(target=anchor_class,
+                                             output=rpn_class_logits,
+                                             from_logits=True)
+    loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
+    return loss
+
+
+def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox):
+    """Return the RPN bounding box loss graph.
+    config: the model config object.
+    target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))].
+        Uses 0 padding to fill in unsed bbox deltas.
+    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
+               -1=negative, 0=neutral anchor.
+    rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]
+    """
+    # Positive anchors contribute to the loss, but negative and
+    # neutral anchors (match value of 0 or -1) don't.
+    rpn_match = K.squeeze(rpn_match, -1)
+    indices = tf.where(K.equal(rpn_match, 1))
+
+    # Pick bbox deltas that contribute to the loss
+    rpn_bbox = tf.gather_nd(rpn_bbox, indices)
+
+    # Trim target bounding box deltas to the same length as rpn_bbox.
+    batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1)
+    target_bbox = batch_pack_graph(target_bbox, batch_counts,
+                                   config.IMAGES_PER_GPU)
+
+    loss = smooth_l1_loss(target_bbox, rpn_bbox)
+    
+    loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
+    return loss
+
+
+def mrcnn_class_loss_graph(target_class_ids, pred_class_logits,
+                           active_class_ids):
+    """Loss for the classifier head of Mask RCNN.
+    target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero
+        padding to fill in the array.
+    pred_class_logits: [batch, num_rois, num_classes]
+    active_class_ids: [batch, num_classes]. Has a value of 1 for
+        classes that are in the dataset of the image, and 0
+        for classes that are not in the dataset.
+    """
+    # During model building, Keras calls this function with
+    # target_class_ids of type float32. Unclear why. Cast it
+    # to int to get around it.
+    target_class_ids = tf.cast(target_class_ids, 'int64')
+
+    # Find predictions of classes that are not in the dataset.
+    pred_class_ids = tf.argmax(pred_class_logits, axis=2)
+    # TODO: Update this line to work with batch > 1. Right now it assumes all
+    #       images in a batch have the same active_class_ids
+    pred_active = tf.gather(active_class_ids[0], pred_class_ids)
+
+    # Loss
+    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=target_class_ids, logits=pred_class_logits)
+
+    # Erase losses of predictions of classes that are not in the active
+    # classes of the image.
+    loss = loss * pred_active
+
+    # Computer loss mean. Use only predictions that contribute
+    # to the loss to get a correct mean.
+    loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active)
+    return loss
+
+
+def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox):
+    """Loss for Mask R-CNN bounding box refinement.
+    target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))]
+    target_class_ids: [batch, num_rois]. Integer class IDs.
+    pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))]
+    """
+    # Reshape to merge batch and roi dimensions for simplicity.
+    target_class_ids = K.reshape(target_class_ids, (-1,))
+    target_bbox = K.reshape(target_bbox, (-1, 4))
+    pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4))
+
+    # Only positive ROIs contribute to the loss. And only
+    # the right class_id of each ROI. Get their indices.
+    positive_roi_ix = tf.where(target_class_ids > 0)[:, 0]
+    positive_roi_class_ids = tf.cast(
+        tf.gather(target_class_ids, positive_roi_ix), tf.int64)
+    indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1)
+
+    # Gather the deltas (predicted and true) that contribute to loss
+    target_bbox = tf.gather(target_bbox, positive_roi_ix)
+    pred_bbox = tf.gather_nd(pred_bbox, indices)
+
+    # Smooth-L1 Loss
+    loss = K.switch(tf.size(target_bbox) > 0,
+                    smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox),
+                    tf.constant(0.0))
+    loss = K.mean(loss)
+    return loss
+
+
+def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks):
+    """Mask binary cross-entropy loss for the masks head.
+    target_masks: [batch, num_rois, height, width].
+        A float32 tensor of values 0 or 1. Uses zero padding to fill array.
+    target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded.
+    pred_masks: [batch, proposals, height, width, num_classes] float32 tensor
+                with values from 0 to 1.
+    """
+    # Reshape for simplicity. Merge first two dimensions into one.
+    target_class_ids = K.reshape(target_class_ids, (-1,))
+    mask_shape = tf.shape(target_masks)
+    target_masks = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3]))
+    pred_shape = tf.shape(pred_masks)
+    pred_masks = K.reshape(pred_masks,
+                           (-1, pred_shape[2], pred_shape[3], pred_shape[4]))
+    # Permute predicted masks to [N, num_classes, height, width]
+    pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2])
+
+    # Only positive ROIs contribute to the loss. And only
+    # the class specific mask of each ROI.
+    positive_ix = tf.where(target_class_ids > 0)[:, 0]
+    positive_class_ids = tf.cast(
+        tf.gather(target_class_ids, positive_ix), tf.int64)
+    indices = tf.stack([positive_ix, positive_class_ids], axis=1)
+
+    # Gather the masks (predicted and true) that contribute to loss
+    y_true = tf.gather(target_masks, positive_ix)
+    y_pred = tf.gather_nd(pred_masks, indices)
+
+    # Compute binary cross entropy. If no positive ROIs, then return 0.
+    # shape: [batch, roi, num_classes]
+    loss = K.switch(tf.size(y_true) > 0,
+                    K.binary_crossentropy(target=y_true, output=y_pred),
+                    tf.constant(0.0))
+    loss = K.mean(loss)
+    return loss
+
+
+############################################################
+#  Data Generator
+############################################################
+
+def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
+                  use_mini_mask=False):
+    """Load and return ground truth data for an image (image, mask, bounding boxes).
+    augment: (deprecated. Use augmentation instead). If true, apply random
+        image augmentation. Currently, only horizontal flipping is offered.
+    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
+        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
+        right/left 50% of the time.
+    use_mini_mask: If False, returns full-size masks that are the same height
+        and width as the original image. These can be big, for example
+        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
+        224x224 and are generated by extracting the bounding box of the
+        object and resizing it to MINI_MASK_SHAPE.
+    Returns:
+    image: [height, width, 3]
+    shape: the original shape of the image before resizing and cropping.
+    class_ids: [instance_count] Integer class IDs
+    bbox: [instance_count, (y1, x1, y2, x2)]
+    mask: [height, width, instance_count]. The height and width are those
+        of the image unless use_mini_mask is True, in which case they are
+        defined in MINI_MASK_SHAPE.
+    """
+    # Load image and mask
+    image = dataset.load_image(image_id)
+    mask, class_ids = dataset.load_mask(image_id)
+    original_shape = image.shape
+    image, window, scale, padding, crop = utils.resize_image(
+        image,
+        min_dim=config.IMAGE_MIN_DIM,
+        min_scale=config.IMAGE_MIN_SCALE,
+        max_dim=config.IMAGE_MAX_DIM,
+        mode=config.IMAGE_RESIZE_MODE)
+    mask = utils.resize_mask(mask, scale, padding, crop)
+
+    # Random horizontal flips.
+    # TODO: will be removed in a future update in favor of augmentation
+    if augment:
+        logging.warning("'augment' is deprecated. Use 'augmentation' instead.")
+        if random.randint(0, 1):
+            image = np.fliplr(image)
+            mask = np.fliplr(mask)
+
+    # Augmentation
+    # This requires the imgaug lib (https://github.com/aleju/imgaug)
+    if augmentation:
+        import imgaug
+
+        # Augmenters that are safe to apply to masks
+        # Some, such as Affine, have settings that make them unsafe, so always
+        # test your augmentation on masks
+        MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes",
+                           "Fliplr", "Flipud", "CropAndPad",
+                           "Affine", "PiecewiseAffine"]
+
+        def hook(images, augmenter, parents, default):
+            """Determines which augmenters to apply to masks."""
+            return augmenter.__class__.__name__ in MASK_AUGMENTERS
+
+        # Store shapes before augmentation to compare
+        image_shape = image.shape
+        mask_shape = mask.shape
+        # Make augmenters deterministic to apply similarly to images and masks
+        det = augmentation.to_deterministic()
+        image = det.augment_image(image)
+        # Change mask to np.uint8 because imgaug doesn't support np.bool
+        mask = det.augment_image(mask.astype(np.uint8),
+                                 hooks=imgaug.HooksImages(activator=hook))
+        # Verify that shapes didn't change
+        assert image.shape == image_shape, "Augmentation shouldn't change image size"
+        assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
+        # Change mask back to bool
+        mask = mask.astype(np.bool)
+
+    # Note that some boxes might be all zeros if the corresponding mask got cropped out.
+    # and here is to filter them out
+    _idx = np.sum(mask, axis=(0, 1)) > 0
+    mask = mask[:, :, _idx]
+    class_ids = class_ids[_idx]
+    # Bounding boxes. Note that some boxes might be all zeros
+    # if the corresponding mask got cropped out.
+    # bbox: [num_instances, (y1, x1, y2, x2)]
+    bbox = utils.extract_bboxes(mask)
+
+    # Active classes
+    # Different datasets have different classes, so track the
+    # classes supported in the dataset of this image.
+    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
+    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]]
+    active_class_ids[source_class_ids] = 1
+
+    # Resize masks to smaller size to reduce memory usage
+    if use_mini_mask:
+        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)
+
+    # Image meta data
+    image_meta = compose_image_meta(image_id, original_shape, image.shape,
+                                    window, scale, active_class_ids)
+
+    return image, image_meta, class_ids, bbox, mask
+
+
+def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks, config):
+    """Generate targets for training Stage 2 classifier and mask heads.
+    This is not used in normal training. It's useful for debugging or to train
+    the Mask RCNN heads without using the RPN head.
+    Inputs:
+    rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes.
+    gt_class_ids: [instance count] Integer class IDs
+    gt_boxes: [instance count, (y1, x1, y2, x2)]
+    gt_masks: [height, width, instance count] Ground truth masks. Can be full
+              size or mini-masks.
+    Returns:
+    rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]
+    class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
+    bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific
+            bbox refinements.
+    masks: [TRAIN_ROIS_PER_IMAGE, height, width, NUM_CLASSES). Class specific masks cropped
+           to bbox boundaries and resized to neural network output size.
+    """
+    assert rpn_rois.shape[0] > 0
+    assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format(
+        gt_class_ids.dtype)
+    assert gt_boxes.dtype == np.int32, "Expected int but got {}".format(
+        gt_boxes.dtype)
+    assert gt_masks.dtype == np.bool_, "Expected bool but got {}".format(
+        gt_masks.dtype)
+
+    # It's common to add GT Boxes to ROIs but we don't do that here because
+    # according to XinLei Chen's paper, it doesn't help.
+
+    # Trim empty padding in gt_boxes and gt_masks parts
+    instance_ids = np.where(gt_class_ids > 0)[0]
+    assert instance_ids.shape[0] > 0, "Image must contain instances."
+    gt_class_ids = gt_class_ids[instance_ids]
+    gt_boxes = gt_boxes[instance_ids]
+    gt_masks = gt_masks[:, :, instance_ids]
+
+    # Compute areas of ROIs and ground truth boxes.
+    rpn_roi_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * \
+        (rpn_rois[:, 3] - rpn_rois[:, 1])
+    gt_box_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * \
+        (gt_boxes[:, 3] - gt_boxes[:, 1])
+
+    # Compute overlaps [rpn_rois, gt_boxes]
+    overlaps = np.zeros((rpn_rois.shape[0], gt_boxes.shape[0]))
+    for i in range(overlaps.shape[1]):
+        gt = gt_boxes[i]
+        overlaps[:, i] = utils.compute_iou(
+            gt, rpn_rois, gt_box_area[i], rpn_roi_area)
+
+    # Assign ROIs to GT boxes
+    rpn_roi_iou_argmax = np.argmax(overlaps, axis=1)
+    rpn_roi_iou_max = overlaps[np.arange(
+        overlaps.shape[0]), rpn_roi_iou_argmax]
+    # GT box assigned to each ROI
+    rpn_roi_gt_boxes = gt_boxes[rpn_roi_iou_argmax]
+    rpn_roi_gt_class_ids = gt_class_ids[rpn_roi_iou_argmax]
+
+    # Positive ROIs are those with >= 0.5 IoU with a GT box.
+    fg_ids = np.where(rpn_roi_iou_max > 0.5)[0]
+
+    # Negative ROIs are those with max IoU 0.1-0.5 (hard example mining)
+    # TODO: To hard example mine or not to hard example mine, that's the question
+    # bg_ids = np.where((rpn_roi_iou_max >= 0.1) & (rpn_roi_iou_max < 0.5))[0]
+    bg_ids = np.where(rpn_roi_iou_max < 0.5)[0]
+
+    # Subsample ROIs. Aim for 33% foreground.
+    # FG
+    fg_roi_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO)
+    if fg_ids.shape[0] > fg_roi_count:
+        keep_fg_ids = np.random.choice(fg_ids, fg_roi_count, replace=False)
+    else:
+        keep_fg_ids = fg_ids
+    # BG
+    remaining = config.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[0]
+    if bg_ids.shape[0] > remaining:
+        keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False)
+    else:
+        keep_bg_ids = bg_ids
+    # Combine indices of ROIs to keep
+    keep = np.concatenate([keep_fg_ids, keep_bg_ids])
+    # Need more?
+    remaining = config.TRAIN_ROIS_PER_IMAGE - keep.shape[0]
+    if remaining > 0:
+        # Looks like we don't have enough samples to maintain the desired
+        # balance. Reduce requirements and fill in the rest. This is
+        # likely different from the Mask RCNN paper.
+
+        # There is a small chance we have neither fg nor bg samples.
+        if keep.shape[0] == 0:
+            # Pick bg regions with easier IoU threshold
+            bg_ids = np.where(rpn_roi_iou_max < 0.5)[0]
+            assert bg_ids.shape[0] >= remaining
+            keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False)
+            assert keep_bg_ids.shape[0] == remaining
+            keep = np.concatenate([keep, keep_bg_ids])
+        else:
+            # Fill the rest with repeated bg rois.
+            keep_extra_ids = np.random.choice(
+                keep_bg_ids, remaining, replace=True)
+            keep = np.concatenate([keep, keep_extra_ids])
+    assert keep.shape[0] == config.TRAIN_ROIS_PER_IMAGE, \
+        "keep doesn't match ROI batch size {}, {}".format(
+            keep.shape[0], config.TRAIN_ROIS_PER_IMAGE)
+
+    # Reset the gt boxes assigned to BG ROIs.
+    rpn_roi_gt_boxes[keep_bg_ids, :] = 0
+    rpn_roi_gt_class_ids[keep_bg_ids] = 0
+
+    # For each kept ROI, assign a class_id, and for FG ROIs also add bbox refinement.
+    rois = rpn_rois[keep]
+    roi_gt_boxes = rpn_roi_gt_boxes[keep]
+    roi_gt_class_ids = rpn_roi_gt_class_ids[keep]
+    roi_gt_assignment = rpn_roi_iou_argmax[keep]
+
+    # Class-aware bbox deltas. [y, x, log(h), log(w)]
+    bboxes = np.zeros((config.TRAIN_ROIS_PER_IMAGE,
+                       config.NUM_CLASSES, 4), dtype=np.float32)
+    pos_ids = np.where(roi_gt_class_ids > 0)[0]
+    bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = utils.box_refinement(
+        rois[pos_ids], roi_gt_boxes[pos_ids, :4])
+    # Normalize bbox refinements
+    bboxes /= config.BBOX_STD_DEV
+
+    # Generate class-specific target masks
+    masks = np.zeros((config.TRAIN_ROIS_PER_IMAGE, config.MASK_SHAPE[0], config.MASK_SHAPE[1], config.NUM_CLASSES),
+                     dtype=np.float32)
+    for i in pos_ids:
+        class_id = roi_gt_class_ids[i]
+        assert class_id > 0, "class id must be greater than 0"
+        gt_id = roi_gt_assignment[i]
+        class_mask = gt_masks[:, :, gt_id]
+
+        if config.USE_MINI_MASK:
+            # Create a mask placeholder, the size of the image
+            placeholder = np.zeros(config.IMAGE_SHAPE[:2], dtype=bool)
+            # GT box
+            gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[gt_id]
+            gt_w = gt_x2 - gt_x1
+            gt_h = gt_y2 - gt_y1
+            # Resize mini mask to size of GT box
+            placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = \
+                np.round(utils.resize(class_mask, (gt_h, gt_w))).astype(bool)
+            # Place the mini batch in the placeholder
+            class_mask = placeholder
+
+        # Pick part of the mask and resize it
+        y1, x1, y2, x2 = rois[i].astype(np.int32)
+        m = class_mask[y1:y2, x1:x2]
+        mask = utils.resize(m, config.MASK_SHAPE)
+        masks[i, :, :, class_id] = mask
+
+    return rois, roi_gt_class_ids, bboxes, masks
+
+
+def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config):
+    """Given the anchors and GT boxes, compute overlaps and identify positive
+    anchors and deltas to refine them to match their corresponding GT boxes.
+    anchors: [num_anchors, (y1, x1, y2, x2)]
+    gt_class_ids: [num_gt_boxes] Integer class IDs.
+    gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)]
+    Returns:
+    rpn_match: [N] (int32) matches between anchors and GT boxes.
+               1 = positive anchor, -1 = negative anchor, 0 = neutral
+    rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
+    """
+    # RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral
+    rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32)
+    # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))]
+    rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4))
+
+    # Handle COCO crowds
+    # A crowd box in COCO is a bounding box around several instances. Exclude
+    # them from training. A crowd box is given a negative class ID.
+    crowd_ix = np.where(gt_class_ids < 0)[0]
+    if crowd_ix.shape[0] > 0:
+        # Filter out crowds from ground truth class IDs and boxes
+        non_crowd_ix = np.where(gt_class_ids > 0)[0]
+        crowd_boxes = gt_boxes[crowd_ix]
+        gt_class_ids = gt_class_ids[non_crowd_ix]
+        gt_boxes = gt_boxes[non_crowd_ix]
+        # Compute overlaps with crowd boxes [anchors, crowds]
+        crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes)
+        crowd_iou_max = np.amax(crowd_overlaps, axis=1)
+        no_crowd_bool = (crowd_iou_max < 0.001)
+    else:
+        # All anchors don't intersect a crowd
+        no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool)
+
+    # Compute overlaps [num_anchors, num_gt_boxes]
+    overlaps = utils.compute_overlaps(anchors, gt_boxes)
+
+    # Match anchors to GT Boxes
+    # If an anchor overlaps a GT box with IoU >= 0.7 then it's positive.
+    # If an anchor overlaps a GT box with IoU < 0.3 then it's negative.
+    # Neutral anchors are those that don't match the conditions above,
+    # and they don't influence the loss function.
+    # However, don't keep any GT box unmatched (rare, but happens). Instead,
+    # match it to the closest anchor (even if its max IoU is < 0.3).
+    #
+    # 1. Set negative anchors first. They get overwritten below if a GT box is
+    # matched to them. Skip boxes in crowd areas.
+    anchor_iou_argmax = np.argmax(overlaps, axis=1)
+    anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax]
+    rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1
+    # 2. Set an anchor for each GT box (regardless of IoU value).
+    # If multiple anchors have the same IoU match all of them
+    gt_iou_argmax = np.argwhere(overlaps == np.max(overlaps, axis=0))[:,0]
+    rpn_match[gt_iou_argmax] = 1
+    # 3. Set anchors with high overlap as positive.
+    rpn_match[anchor_iou_max >= 0.7] = 1
+
+    # Subsample to balance positive and negative anchors
+    # Don't let positives be more than half the anchors
+    ids = np.where(rpn_match == 1)[0]
+    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2)
+    if extra > 0:
+        # Reset the extra ones to neutral
+        ids = np.random.choice(ids, extra, replace=False)
+        rpn_match[ids] = 0
+    # Same for negative proposals
+    ids = np.where(rpn_match == -1)[0]
+    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE -
+                        np.sum(rpn_match == 1))
+    if extra > 0:
+        # Rest the extra ones to neutral
+        ids = np.random.choice(ids, extra, replace=False)
+        rpn_match[ids] = 0
+
+    # For positive anchors, compute shift and scale needed to transform them
+    # to match the corresponding GT boxes.
+    ids = np.where(rpn_match == 1)[0]
+    ix = 0  # index into rpn_bbox
+    # TODO: use box_refinement() rather than duplicating the code here
+    for i, a in zip(ids, anchors[ids]):
+        # Closest gt box (it might have IoU < 0.7)
+        gt = gt_boxes[anchor_iou_argmax[i]]
+
+        # Convert coordinates to center plus width/height.
+        # GT Box
+        gt_h = gt[2] - gt[0]
+        gt_w = gt[3] - gt[1]
+        gt_center_y = gt[0] + 0.5 * gt_h
+        gt_center_x = gt[1] + 0.5 * gt_w
+        # Anchor
+        a_h = a[2] - a[0]
+        a_w = a[3] - a[1]
+        a_center_y = a[0] + 0.5 * a_h
+        a_center_x = a[1] + 0.5 * a_w
+
+        # Compute the bbox refinement that the RPN should predict.
+        rpn_bbox[ix] = [
+            (gt_center_y - a_center_y) / a_h,
+            (gt_center_x - a_center_x) / a_w,
+            np.log(gt_h / a_h),
+            np.log(gt_w / a_w),
+        ]
+        # Normalize
+        rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV
+        ix += 1
+
+    return rpn_match, rpn_bbox
+
+
+def generate_random_rois(image_shape, count, gt_class_ids, gt_boxes):
+    """Generates ROI proposals similar to what a region proposal network
+    would generate.
+    image_shape: [Height, Width, Depth]
+    count: Number of ROIs to generate
+    gt_class_ids: [N] Integer ground truth class IDs
+    gt_boxes: [N, (y1, x1, y2, x2)] Ground truth boxes in pixels.
+    Returns: [count, (y1, x1, y2, x2)] ROI boxes in pixels.
+    """
+    # placeholder
+    rois = np.zeros((count, 4), dtype=np.int32)
+
+    # Generate random ROIs around GT boxes (90% of count)
+    rois_per_box = int(0.9 * count / gt_boxes.shape[0])
+    for i in range(gt_boxes.shape[0]):
+        gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[i]
+        h = gt_y2 - gt_y1
+        w = gt_x2 - gt_x1
+        # random boundaries
+        r_y1 = max(gt_y1 - h, 0)
+        r_y2 = min(gt_y2 + h, image_shape[0])
+        r_x1 = max(gt_x1 - w, 0)
+        r_x2 = min(gt_x2 + w, image_shape[1])
+
+        # To avoid generating boxes with zero area, we generate double what
+        # we need and filter out the extra. If we get fewer valid boxes
+        # than we need, we loop and try again.
+        while True:
+            y1y2 = np.random.randint(r_y1, r_y2, (rois_per_box * 2, 2))
+            x1x2 = np.random.randint(r_x1, r_x2, (rois_per_box * 2, 2))
+            # Filter out zero area boxes
+            threshold = 1
+            y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >=
+                        threshold][:rois_per_box]
+            x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >=
+                        threshold][:rois_per_box]
+            if y1y2.shape[0] == rois_per_box and x1x2.shape[0] == rois_per_box:
+                break
+
+        # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape
+        # into x1, y1, x2, y2 order
+        x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1)
+        y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1)
+        box_rois = np.hstack([y1, x1, y2, x2])
+        rois[rois_per_box * i:rois_per_box * (i + 1)] = box_rois
+
+    # Generate random ROIs anywhere in the image (10% of count)
+    remaining_count = count - (rois_per_box * gt_boxes.shape[0])
+    # To avoid generating boxes with zero area, we generate double what
+    # we need and filter out the extra. If we get fewer valid boxes
+    # than we need, we loop and try again.
+    while True:
+        y1y2 = np.random.randint(0, image_shape[0], (remaining_count * 2, 2))
+        x1x2 = np.random.randint(0, image_shape[1], (remaining_count * 2, 2))
+        # Filter out zero area boxes
+        threshold = 1
+        y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >=
+                    threshold][:remaining_count]
+        x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >=
+                    threshold][:remaining_count]
+        if y1y2.shape[0] == remaining_count and x1x2.shape[0] == remaining_count:
+            break
+
+    # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape
+    # into x1, y1, x2, y2 order
+    x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1)
+    y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1)
+    global_rois = np.hstack([y1, x1, y2, x2])
+    rois[-remaining_count:] = global_rois
+    return rois
+
+
+def data_generator(dataset, config, shuffle=True, augment=False, augmentation=None,
+                   random_rois=0, batch_size=1, detection_targets=False,
+                   no_augmentation_sources=None):
+    """A generator that returns images and corresponding target class ids,
+    bounding box deltas, and masks.
+    dataset: The Dataset object to pick data from
+    config: The model config object
+    shuffle: If True, shuffles the samples before every epoch
+    augment: (deprecated. Use augmentation instead). If true, apply random
+        image augmentation. Currently, only horizontal flipping is offered.
+    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
+        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
+        right/left 50% of the time.
+    random_rois: If > 0 then generate proposals to be used to train the
+                 network classifier and mask heads. Useful if training
+                 the Mask RCNN part without the RPN.
+    batch_size: How many images to return in each call
+    detection_targets: If True, generate detection targets (class IDs, bbox
+        deltas, and masks). Typically for debugging or visualizations because
+        in trainig detection targets are generated by DetectionTargetLayer.
+    no_augmentation_sources: Optional. List of sources to exclude for
+        augmentation. A source is string that identifies a dataset and is
+        defined in the Dataset class.
+    Returns a Python generator. Upon calling next() on it, the
+    generator returns two lists, inputs and outputs. The contents
+    of the lists differs depending on the received arguments:
+    inputs list:
+    - images: [batch, H, W, C]
+    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
+    - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
+    - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
+    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
+    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
+    - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
+                are those of the image unless use_mini_mask is True, in which
+                case they are defined in MINI_MASK_SHAPE.
+    outputs list: Usually empty in regular training. But if detection_targets
+        is True then the outputs list contains target class_ids, bbox deltas,
+        and masks.
+    """
+    b = 0  # batch item index
+    image_index = -1
+    image_ids = np.copy(dataset.image_ids)
+    error_count = 0
+    no_augmentation_sources = no_augmentation_sources or []
+
+    # Anchors
+    # [anchor_count, (y1, x1, y2, x2)]
+    backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE)
+    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
+                                             config.RPN_ANCHOR_RATIOS,
+                                             backbone_shapes,
+                                             config.BACKBONE_STRIDES,
+                                             config.RPN_ANCHOR_STRIDE)
+
+    # Keras requires a generator to run indefinitely.
+    while True:
+        try:
+            # Increment index to pick next image. Shuffle if at the start of an epoch.
+            image_index = (image_index + 1) % len(image_ids)
+            if shuffle and image_index == 0:
+                np.random.shuffle(image_ids)
+
+            # Get GT bounding boxes and masks for image.
+            image_id = image_ids[image_index]
+
+            # If the image source is not to be augmented pass None as augmentation
+            if dataset.image_info[image_id]['source'] in no_augmentation_sources:
+                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
+                load_image_gt(dataset, config, image_id, augment=augment,
+                              augmentation=None,
+                              use_mini_mask=config.USE_MINI_MASK)
+            else:
+                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
+                    load_image_gt(dataset, config, image_id, augment=augment,
+                                augmentation=augmentation,
+                                use_mini_mask=config.USE_MINI_MASK)
+
+            # Skip images that have no instances. This can happen in cases
+            # where we train on a subset of classes and the image doesn't
+            # have any of the classes we care about.
+            if not np.any(gt_class_ids > 0):
+                continue
+
+            # RPN Targets
+            rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
+                                                    gt_class_ids, gt_boxes, config)
+
+            # Mask R-CNN Targets
+            if random_rois:
+                rpn_rois = generate_random_rois(
+                    image.shape, random_rois, gt_class_ids, gt_boxes)
+                if detection_targets:
+                    rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask =\
+                        build_detection_targets(
+                            rpn_rois, gt_class_ids, gt_boxes, gt_masks, config)
+
+            # Init batch arrays
+            if b == 0:
+                batch_image_meta = np.zeros(
+                    (batch_size,) + image_meta.shape, dtype=image_meta.dtype)
+                batch_rpn_match = np.zeros(
+                    [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype)
+                batch_rpn_bbox = np.zeros(
+                    [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype)
+                batch_images = np.zeros(
+                    (batch_size,) + image.shape, dtype=np.float32)
+                batch_gt_class_ids = np.zeros(
+                    (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
+                batch_gt_boxes = np.zeros(
+                    (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
+                batch_gt_masks = np.zeros(
+                    (batch_size, gt_masks.shape[0], gt_masks.shape[1],
+                     config.MAX_GT_INSTANCES), dtype=gt_masks.dtype)
+                if random_rois:
+                    batch_rpn_rois = np.zeros(
+                        (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype)
+                    if detection_targets:
+                        batch_rois = np.zeros(
+                            (batch_size,) + rois.shape, dtype=rois.dtype)
+                        batch_mrcnn_class_ids = np.zeros(
+                            (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype)
+                        batch_mrcnn_bbox = np.zeros(
+                            (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype)
+                        batch_mrcnn_mask = np.zeros(
+                            (batch_size,) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype)
+
+            # If more instances than fits in the array, sub-sample from them.
+            if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
+                ids = np.random.choice(
+                    np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False)
+                gt_class_ids = gt_class_ids[ids]
+                gt_boxes = gt_boxes[ids]
+                gt_masks = gt_masks[:, :, ids]
+
+            # Add to batch
+            batch_image_meta[b] = image_meta
+            batch_rpn_match[b] = rpn_match[:, np.newaxis]
+            batch_rpn_bbox[b] = rpn_bbox
+            batch_images[b] = mold_image(image.astype(np.float32), config)
+            batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids
+            batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
+            batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks
+            if random_rois:
+                batch_rpn_rois[b] = rpn_rois
+                if detection_targets:
+                    batch_rois[b] = rois
+                    batch_mrcnn_class_ids[b] = mrcnn_class_ids
+                    batch_mrcnn_bbox[b] = mrcnn_bbox
+                    batch_mrcnn_mask[b] = mrcnn_mask
+            b += 1
+
+            # Batch full?
+            if b >= batch_size:
+                inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox,
+                          batch_gt_class_ids, batch_gt_boxes, batch_gt_masks]
+                outputs = []
+
+                if random_rois:
+                    inputs.extend([batch_rpn_rois])
+                    if detection_targets:
+                        inputs.extend([batch_rois])
+                        # Keras requires that output and targets have the same number of dimensions
+                        batch_mrcnn_class_ids = np.expand_dims(
+                            batch_mrcnn_class_ids, -1)
+                        outputs.extend(
+                            [batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask])
+
+                yield inputs, outputs
+
+                # start a new batch
+                b = 0
+        except (GeneratorExit, KeyboardInterrupt):
+            raise
+        except:
+            # Log it and skip the image
+            logging.exception("Error processing image {}".format(
+                dataset.image_info[image_id]))
+            error_count += 1
+            if error_count > 5:
+                raise
+
+
+############################################################
+#  MaskRCNN Class
+############################################################
+
+class MaskRCNN():
+    """Encapsulates the Mask RCNN model functionality.
+    The actual Keras model is in the keras_model property.
+    """
+
+    def __init__(self, mode, config, model_dir):
+        """
+        mode: Either "training" or "inference"
+        config: A Sub-class of the Config class
+        model_dir: Directory to save training logs and trained weights
+        """
+        assert mode in ['training', 'inference']
+        self.mode = mode
+        self.config = config
+        self.model_dir = model_dir
+        self.set_log_dir()
+        self.keras_model = self.build(mode=mode, config=config)
+
+    def build(self, mode, config):
+        """Build Mask R-CNN architecture.
+            input_shape: The shape of the input image.
+            mode: Either "training" or "inference". The inputs and
+                outputs of the model differ accordingly.
+        """
+        assert mode in ['training', 'inference']
+
+        # Image size must be dividable by 2 multiple times
+        h, w = config.IMAGE_SHAPE[:2]
+        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
+            raise Exception("Image size must be dividable by 2 at least 6 times "
+                            "to avoid fractions when downscaling and upscaling."
+                            "For example, use 256, 320, 384, 448, 512, ... etc. ")
+
+        # Inputs
+        input_image = KL.Input(
+            shape=[None, None, config.IMAGE_SHAPE[2]], name="input_image")
+        input_image_meta = KL.Input(shape=[config.IMAGE_META_SIZE],
+                                    name="input_image_meta")
+        if mode == "training":
+            # RPN GT
+            input_rpn_match = KL.Input(
+                shape=[None, 1], name="input_rpn_match", dtype=tf.int32)
+            input_rpn_bbox = KL.Input(
+                shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32)
+
+            # Detection GT (class IDs, bounding boxes, and masks)
+            # 1. GT Class IDs (zero padded)
+            input_gt_class_ids = KL.Input(
+                shape=[None], name="input_gt_class_ids", dtype=tf.int32)
+            # 2. GT Boxes in pixels (zero padded)
+            # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
+            input_gt_boxes = KL.Input(
+                shape=[None, 4], name="input_gt_boxes", dtype=tf.float32)
+            # Normalize coordinates
+            gt_boxes = KL.Lambda(lambda x: norm_boxes_graph(
+                x, K.shape(input_image)[1:3]))(input_gt_boxes)
+            # 3. GT Masks (zero padded)
+            # [batch, height, width, MAX_GT_INSTANCES]
+            if config.USE_MINI_MASK:
+                input_gt_masks = KL.Input(
+                    shape=[config.MINI_MASK_SHAPE[0],
+                           config.MINI_MASK_SHAPE[1], None],
+                    name="input_gt_masks", dtype=bool)
+            else:
+                input_gt_masks = KL.Input(
+                    shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],
+                    name="input_gt_masks", dtype=bool)
+        elif mode == "inference":
+            # Anchors in normalized coordinates
+            input_anchors = KL.Input(shape=[None, 4], name="input_anchors")
+
+        # Build the shared convolutional layers.
+        # Bottom-up Layers
+        # Returns a list of the last layers of each stage, 5 in total.
+        # Don't create the thead (stage 5), so we pick the 4th item in the list.
+        if callable(config.BACKBONE):
+            _, C2, C3, C4, C5 = config.BACKBONE(input_image, stage5=True,
+                                                train_bn=config.TRAIN_BN)
+        else:
+            _, C2, C3, C4, C5 = resnet_graph(input_image, config.BACKBONE,
+                                             stage5=True, train_bn=config.TRAIN_BN)
+        # Top-down Layers
+        # TODO: add assert to varify feature map sizes match what's in config
+        P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5)
+        P4 = KL.Add(name="fpn_p4add")([
+            KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
+            KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4)])
+        P3 = KL.Add(name="fpn_p3add")([
+            KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
+            KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3)])
+        P2 = KL.Add(name="fpn_p2add")([
+            KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
+            KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2)])
+        # Attach 3x3 conv to all P layers to get the final feature maps.
+        P2 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2")(P2)
+        P3 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3")(P3)
+        P4 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4")(P4)
+        P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5")(P5)
+        # P6 is used for the 5th anchor scale in RPN. Generated by
+        # subsampling from P5 with stride of 2.
+        P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)
+
+        # Note that P6 is used in RPN, but not in the classifier heads.
+        rpn_feature_maps = [P2, P3, P4, P5, P6]
+        mrcnn_feature_maps = [P2, P3, P4, P5]
+
+        # Anchors
+        if mode == "training":
+            anchors = self.get_anchors(config.IMAGE_SHAPE)
+            # Duplicate across the batch dimension because Keras requires it
+            # TODO: can this be optimized to avoid duplicating the anchors?
+            anchors = np.broadcast_to(anchors, (config.BATCH_SIZE,) + anchors.shape)
+            # A hack to get around Keras's bad support for constants
+            anchors = KL.Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image)
+        else:
+            anchors = input_anchors
+
+        # RPN Model
+        rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE,
+                              len(config.RPN_ANCHOR_RATIOS), config.TOP_DOWN_PYRAMID_SIZE)
+        # Loop through pyramid layers
+        layer_outputs = []  # list of lists
+        for p in rpn_feature_maps:
+            layer_outputs.append(rpn([p]))
+        # Concatenate layer outputs
+        # Convert from list of lists of level outputs to list of lists
+        # of outputs across levels.
+        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
+        output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
+        outputs = list(zip(*layer_outputs))
+        outputs = [KL.Concatenate(axis=1, name=n)(list(o))
+                   for o, n in zip(outputs, output_names)]
+
+        rpn_class_logits, rpn_class, rpn_bbox = outputs
+
+        # Generate proposals
+        # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
+        # and zero padded.
+        proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\
+            else config.POST_NMS_ROIS_INFERENCE
+        rpn_rois = ProposalLayer(
+            proposal_count=proposal_count,
+            nms_threshold=config.RPN_NMS_THRESHOLD,
+            name="ROI",
+            config=config)([rpn_class, rpn_bbox, anchors])
+
+        if mode == "training":
+            # Class ID mask to mark class IDs supported by the dataset the image
+            # came from.
+            active_class_ids = KL.Lambda(
+                lambda x: parse_image_meta_graph(x)["active_class_ids"]
+                )(input_image_meta)
+
+            if not config.USE_RPN_ROIS:
+                # Ignore predicted ROIs and use ROIs provided as an input.
+                input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4],
+                                      name="input_roi", dtype=np.int32)
+                # Normalize coordinates
+                target_rois = KL.Lambda(lambda x: norm_boxes_graph(
+                    x, K.shape(input_image)[1:3]))(input_rois)
+            else:
+                target_rois = rpn_rois
+
+            # Generate detection targets
+            # Subsamples proposals and generates target outputs for training
+            # Note that proposal class IDs, gt_boxes, and gt_masks are zero
+            # padded. Equally, returned rois and targets are zero padded.
+            rois, target_class_ids, target_bbox, target_mask =\
+                DetectionTargetLayer(config, name="proposal_targets")([
+                    target_rois, input_gt_class_ids, gt_boxes, input_gt_masks])
+
+            # Network Heads
+            # TODO: verify that this handles zero padded ROIs
+            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
+                fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta,
+                                     config.POOL_SIZE, config.NUM_CLASSES,
+                                     train_bn=config.TRAIN_BN,
+                                     fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
+
+            mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps,
+                                              input_image_meta,
+                                              config.MASK_POOL_SIZE,
+                                              config.NUM_CLASSES,
+                                              train_bn=config.TRAIN_BN)
+
+            # TODO: clean up (use tf.identify if necessary)
+            output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois)
+
+            # Losses
+            rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")(
+                [input_rpn_match, rpn_class_logits])
+            rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")(
+                [input_rpn_bbox, input_rpn_match, rpn_bbox])
+            class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")(
+                [target_class_ids, mrcnn_class_logits, active_class_ids])
+            bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")(
+                [target_bbox, target_class_ids, mrcnn_bbox])
+            mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")(
+                [target_mask, target_class_ids, mrcnn_mask])
+
+            # Model
+            inputs = [input_image, input_image_meta,
+                      input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks]
+            if not config.USE_RPN_ROIS:
+                inputs.append(input_rois)
+            outputs = [rpn_class_logits, rpn_class, rpn_bbox,
+                       mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask,
+                       rpn_rois, output_rois,
+                       rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss]
+            model = KM.Model(inputs, outputs, name='mask_rcnn')
+        else:
+            # Network Heads
+            # Proposal classifier and BBox regressor heads
+            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
+                fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta,
+                                     config.POOL_SIZE, config.NUM_CLASSES,
+                                     train_bn=config.TRAIN_BN,
+                                     fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
+
+            # Detections
+            # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
+            # normalized coordinates
+            detections = DetectionLayer(config, name="mrcnn_detection")(
+                [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])
+
+            # Create masks for detections
+            detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
+            mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps,
+                                              input_image_meta,
+                                              config.MASK_POOL_SIZE,
+                                              config.NUM_CLASSES,
+                                              train_bn=config.TRAIN_BN)
+
+            model = KM.Model([input_image, input_image_meta, input_anchors],
+                             [detections, mrcnn_class, mrcnn_bbox,
+                                 mrcnn_mask, rpn_rois, rpn_class, rpn_bbox],
+                             name='mask_rcnn')
+
+        # Add multi-GPU support.
+        if config.GPU_COUNT > 1:
+            from mrcnn.parallel_model import ParallelModel
+            model = ParallelModel(model, config.GPU_COUNT)
+
+        return model
+
+    def find_last(self):
+        """Finds the last checkpoint file of the last trained model in the
+        model directory.
+        Returns:
+            The path of the last checkpoint file
+        """
+        # Get directory names. Each directory corresponds to a model
+        dir_names = next(os.walk(self.model_dir))[1]
+        key = self.config.NAME.lower()
+        dir_names = filter(lambda f: f.startswith(key), dir_names)
+        dir_names = sorted(dir_names)
+        if not dir_names:
+            import errno
+            raise FileNotFoundError(
+                errno.ENOENT,
+                "Could not find model directory under {}".format(self.model_dir))
+        # Pick last directory
+        dir_name = os.path.join(self.model_dir, dir_names[-1])
+        # Find the last checkpoint
+        checkpoints = next(os.walk(dir_name))[2]
+        checkpoints = filter(lambda f: f.startswith("mask_rcnn"), checkpoints)
+        checkpoints = sorted(checkpoints)
+        if not checkpoints:
+            import errno
+            raise FileNotFoundError(
+                errno.ENOENT, "Could not find weight files in {}".format(dir_name))
+        checkpoint = os.path.join(dir_name, checkpoints[-1])
+        return checkpoint
+
+    def load_weights(self, filepath, by_name=False, exclude=None):
+        """Modified version of the corresponding Keras function with
+        the addition of multi-GPU support and the ability to exclude
+        some layers from loading.
+        exclude: list of layer names to exclude
+        """
+        import h5py
+        # Conditional import to support versions of Keras before 2.2
+        # TODO: remove in about 6 months (end of 2018)
+        try:
+            from keras.engine import saving
+        except ImportError:
+            # Keras before 2.2 used the 'topology' namespace.
+            from keras.engine import topology as saving
+
+        if exclude:
+            by_name = True
+
+        if h5py is None:
+            raise ImportError('`load_weights` requires h5py.')
+        f = h5py.File(filepath, mode='r')
+        if 'layer_names' not in f.attrs and 'model_weights' in f:
+            f = f['model_weights']
+
+        # In multi-GPU training, we wrap the model. Get layers
+        # of the inner model because they have the weights.
+        keras_model = self.keras_model
+        layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\
+            else keras_model.layers
+
+        # Exclude some layers
+        if exclude:
+            layers = filter(lambda l: l.name not in exclude, layers)
+
+        if by_name:
+            saving.load_weights_from_hdf5_group_by_name(f, layers)
+        else:
+            saving.load_weights_from_hdf5_group(f, layers)
+        if hasattr(f, 'close'):
+            f.close()
+
+        # Update the log directory
+        self.set_log_dir(filepath)
+
+    def get_imagenet_weights(self):
+        """Downloads ImageNet trained weights from Keras.
+        Returns path to weights file.
+        """
+        from keras.utils.data_utils import get_file
+        TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/'\
+                                 'releases/download/v0.2/'\
+                                 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
+        weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
+                                TF_WEIGHTS_PATH_NO_TOP,
+                                cache_subdir='models',
+                                md5_hash='a268eb855778b3df3c7506639542a6af')
+        return weights_path
+
+    def compile(self, learning_rate, momentum):
+        """Gets the model ready for training. Adds losses, regularization, and
+        metrics. Then calls the Keras compile() function.
+        """
+        # Optimizer object
+        optimizer = keras.optimizers.SGD(
+            lr=learning_rate, momentum=momentum,
+            clipnorm=self.config.GRADIENT_CLIP_NORM)
+        # Add Losses
+        # First, clear previously set losses to avoid duplication
+        self.keras_model._losses = []
+        self.keras_model._per_input_losses = {}
+        loss_names = [
+            "rpn_class_loss",  "rpn_bbox_loss",
+            "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss"]
+        for name in loss_names:
+            layer = self.keras_model.get_layer(name)
+            if layer.output in self.keras_model.losses:
+                continue
+            loss = (
+                tf.reduce_mean(layer.output, keepdims=True)
+                * self.config.LOSS_WEIGHTS.get(name, 1.))
+            self.keras_model.add_loss(loss)
+
+        # Add L2 Regularization
+        # Skip gamma and beta weights of batch normalization layers.
+        reg_losses = [
+            keras.regularizers.l2(self.config.WEIGHT_DECAY)(w) / tf.cast(tf.size(w), tf.float32)
+            for w in self.keras_model.trainable_weights
+            if 'gamma' not in w.name and 'beta' not in w.name]
+        self.keras_model.add_loss(tf.add_n(reg_losses))
+
+        # Compile
+        self.keras_model.compile(
+            optimizer=optimizer,
+            loss=[None] * len(self.keras_model.outputs))
+
+        # Add metrics for losses
+        for name in loss_names:
+            if name in self.keras_model.metrics_names:
+                continue
+            layer = self.keras_model.get_layer(name)
+            self.keras_model.metrics_names.append(name)
+            loss = (
+                tf.reduce_mean(layer.output, keepdims=True)
+                * self.config.LOSS_WEIGHTS.get(name, 1.))
+            self.keras_model.metrics_tensors.append(loss)
+
+    def set_trainable(self, layer_regex, keras_model=None, indent=0, verbose=1):
+        """Sets model layers as trainable if their names match
+        the given regular expression.
+        """
+        # Print message on the first call (but not on recursive calls)
+        if verbose > 0 and keras_model is None:
+            log("Selecting layers to train")
+
+        keras_model = keras_model or self.keras_model
+
+        # In multi-GPU training, we wrap the model. Get layers
+        # of the inner model because they have the weights.
+        layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\
+            else keras_model.layers
+
+        for layer in layers:
+            # Is the layer a model?
+            if layer.__class__.__name__ == 'Model':
+                print("In model: ", layer.name)
+                self.set_trainable(
+                    layer_regex, keras_model=layer, indent=indent + 4)
+                continue
+
+            if not layer.weights:
+                continue
+            # Is it trainable?
+            trainable = bool(re.fullmatch(layer_regex, layer.name))
+            # Update layer. If layer is a container, update inner layer.
+            if layer.__class__.__name__ == 'TimeDistributed':
+                layer.layer.trainable = trainable
+            else:
+                layer.trainable = trainable
+            # Print trainable layer names
+            if trainable and verbose > 0:
+                log("{}{:20}   ({})".format(" " * indent, layer.name,
+                                            layer.__class__.__name__))
+
+    def set_log_dir(self, model_path=None):
+        """Sets the model log directory and epoch counter.
+        model_path: If None, or a format different from what this code uses
+            then set a new log directory and start epochs from 0. Otherwise,
+            extract the log directory and the epoch counter from the file
+            name.
+        """
+        # Set date and epoch counter as if starting a new model
+        self.epoch = 0
+        now = datetime.datetime.now()
+
+        # If we have a model path with date and epochs use them
+        if model_path:
+            # Continue from we left of. Get epoch and date from the file name
+            # A sample model path might look like:
+            # \path\to\logs\coco20171029T2315\mask_rcnn_coco_0001.h5 (Windows)
+            # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 (Linux)
+            regex = r".*[/\\][\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})[/\\]mask\_rcnn\_[\w-]+(\d{4})\.h5"
+            m = re.match(regex, model_path)
+            if m:
+                now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
+                                        int(m.group(4)), int(m.group(5)))
+                # Epoch number in file is 1-based, and in Keras code it's 0-based.
+                # So, adjust for that then increment by one to start from the next epoch
+                self.epoch = int(m.group(6)) - 1 + 1
+                print('Re-starting from epoch %d' % self.epoch)
+
+        # Directory for training logs
+        self.log_dir = os.path.join(self.model_dir, "{}{:%Y%m%dT%H%M}".format(
+            self.config.NAME.lower(), now))
+
+        # Path to save after each epoch. Include placeholders that get filled by Keras.
+        self.checkpoint_path = os.path.join(self.log_dir, "mask_rcnn_{}_*epoch*.h5".format(
+            self.config.NAME.lower()))
+        self.checkpoint_path = self.checkpoint_path.replace(
+            "*epoch*", "{epoch:04d}")
+
+    def train(self, train_dataset, val_dataset, learning_rate, epochs, layers,
+              augmentation=None, custom_callbacks=None, no_augmentation_sources=None):
+        """Train the model.
+        train_dataset, val_dataset: Training and validation Dataset objects.
+        learning_rate: The learning rate to train with
+        epochs: Number of training epochs. Note that previous training epochs
+                are considered to be done alreay, so this actually determines
+                the epochs to train in total rather than in this particaular
+                call.
+        layers: Allows selecting wich layers to train. It can be:
+            - A regular expression to match layer names to train
+            - One of these predefined values:
+              heads: The RPN, classifier and mask heads of the network
+              all: All the layers
+              3+: Train Resnet stage 3 and up
+              4+: Train Resnet stage 4 and up
+              5+: Train Resnet stage 5 and up
+        augmentation: Optional. An imgaug (https://github.com/aleju/imgaug)
+            augmentation. For example, passing imgaug.augmenters.Fliplr(0.5)
+            flips images right/left 50% of the time. You can pass complex
+            augmentations as well. This augmentation applies 50% of the
+            time, and when it does it flips images right/left half the time
+            and adds a Gaussian blur with a random sigma in range 0 to 5.
+                augmentation = imgaug.augmenters.Sometimes(0.5, [
+                    imgaug.augmenters.Fliplr(0.5),
+                    imgaug.augmenters.GaussianBlur(sigma=(0.0, 5.0))
+                ])
+	    custom_callbacks: Optional. Add custom callbacks to be called
+	        with the keras fit_generator method. Must be list of type keras.callbacks.
+        no_augmentation_sources: Optional. List of sources to exclude for
+            augmentation. A source is string that identifies a dataset and is
+            defined in the Dataset class.
+        """
+        assert self.mode == "training", "Create model in training mode."
+
+        # Pre-defined layer regular expressions
+        layer_regex = {
+            # all layers but the backbone
+            "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
+            # From a specific Resnet stage and up
+            "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
+            "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
+            "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
+            # All layers
+            "all": ".*",
+        }
+        if layers in layer_regex.keys():
+            layers = layer_regex[layers]
+
+        # Data generators
+        train_generator = data_generator(train_dataset, self.config, shuffle=True,
+                                         augmentation=augmentation,
+                                         batch_size=self.config.BATCH_SIZE,
+                                         no_augmentation_sources=no_augmentation_sources)
+        val_generator = data_generator(val_dataset, self.config, shuffle=True,
+                                       batch_size=self.config.BATCH_SIZE)
+
+        # Create log_dir if it does not exist
+        if not os.path.exists(self.log_dir):
+            os.makedirs(self.log_dir)
+
+        # Callbacks
+        callbacks = [
+            keras.callbacks.TensorBoard(log_dir=self.log_dir,
+                                        histogram_freq=0, write_graph=True, write_images=False),
+            keras.callbacks.ModelCheckpoint(self.checkpoint_path,
+                                            verbose=0, save_weights_only=True),
+        ]
+
+        # Add custom callbacks to the list
+        if custom_callbacks:
+            callbacks += custom_callbacks
+
+        # Train
+        log("\nStarting at epoch {}. LR={}\n".format(self.epoch, learning_rate))
+        log("Checkpoint Path: {}".format(self.checkpoint_path))
+        self.set_trainable(layers)
+        self.compile(learning_rate, self.config.LEARNING_MOMENTUM)
+
+        # Work-around for Windows: Keras fails on Windows when using
+        # multiprocessing workers. See discussion here:
+        # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009
+        if os.name is 'nt':
+            workers = 0
+        else:
+            workers = multiprocessing.cpu_count()
+
+        self.keras_model.fit_generator(
+            train_generator,
+            initial_epoch=self.epoch,
+            epochs=epochs,
+            steps_per_epoch=self.config.STEPS_PER_EPOCH,
+            callbacks=callbacks,
+            validation_data=val_generator,
+            validation_steps=self.config.VALIDATION_STEPS,
+            max_queue_size=100,
+            workers=workers,
+            use_multiprocessing=True,
+        )
+        self.epoch = max(self.epoch, epochs)
+
+    def mold_inputs(self, images):
+        """Takes a list of images and modifies them to the format expected
+        as an input to the neural network.
+        images: List of image matrices [height,width,depth]. Images can have
+            different sizes.
+        Returns 3 Numpy matrices:
+        molded_images: [N, h, w, 3]. Images resized and normalized.
+        image_metas: [N, length of meta data]. Details about each image.
+        windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the
+            original image (padding excluded).
+        """
+        molded_images = []
+        image_metas = []
+        windows = []
+        for image in images:
+            # Resize image
+            # TODO: move resizing to mold_image()
+            molded_image, window, scale, padding, crop = utils.resize_image(
+                image,
+                min_dim=self.config.IMAGE_MIN_DIM,
+                min_scale=self.config.IMAGE_MIN_SCALE,
+                max_dim=self.config.IMAGE_MAX_DIM,
+                mode=self.config.IMAGE_RESIZE_MODE)
+            molded_image = mold_image(molded_image, self.config)
+            # Build image_meta
+            image_meta = compose_image_meta(
+                0, image.shape, molded_image.shape, window, scale,
+                np.zeros([self.config.NUM_CLASSES], dtype=np.int32))
+            # Append
+            molded_images.append(molded_image)
+            windows.append(window)
+            image_metas.append(image_meta)
+        # Pack into arrays
+        molded_images = np.stack(molded_images)
+        image_metas = np.stack(image_metas)
+        windows = np.stack(windows)
+        return molded_images, image_metas, windows
+
+    def unmold_detections(self, detections, mrcnn_mask, original_image_shape,
+                          image_shape, window):
+        """Reformats the detections of one image from the format of the neural
+        network output to a format suitable for use in the rest of the
+        application.
+        detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates
+        mrcnn_mask: [N, height, width, num_classes]
+        original_image_shape: [H, W, C] Original image shape before resizing
+        image_shape: [H, W, C] Shape of the image after resizing and padding
+        window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real
+                image is excluding the padding.
+        Returns:
+        boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels
+        class_ids: [N] Integer class IDs for each bounding box
+        scores: [N] Float probability scores of the class_id
+        masks: [height, width, num_instances] Instance masks
+        """
+        # How many detections do we have?
+        # Detections array is padded with zeros. Find the first class_id == 0.
+        zero_ix = np.where(detections[:, 4] == 0)[0]
+        N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0]
+
+        # Extract boxes, class_ids, scores, and class-specific masks
+        boxes = detections[:N, :4]
+        class_ids = detections[:N, 4].astype(np.int32)
+        scores = detections[:N, 5]
+        masks = mrcnn_mask[np.arange(N), :, :, class_ids]
+
+        # Translate normalized coordinates in the resized image to pixel
+        # coordinates in the original image before resizing
+        window = utils.norm_boxes(window, image_shape[:2])
+        wy1, wx1, wy2, wx2 = window
+        shift = np.array([wy1, wx1, wy1, wx1])
+        wh = wy2 - wy1  # window height
+        ww = wx2 - wx1  # window width
+        scale = np.array([wh, ww, wh, ww])
+        # Convert boxes to normalized coordinates on the window
+        boxes = np.divide(boxes - shift, scale)
+        # Convert boxes to pixel coordinates on the original image
+        boxes = utils.denorm_boxes(boxes, original_image_shape[:2])
+
+        # Filter out detections with zero area. Happens in early training when
+        # network weights are still random
+        exclude_ix = np.where(
+            (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0]
+        if exclude_ix.shape[0] > 0:
+            boxes = np.delete(boxes, exclude_ix, axis=0)
+            class_ids = np.delete(class_ids, exclude_ix, axis=0)
+            scores = np.delete(scores, exclude_ix, axis=0)
+            masks = np.delete(masks, exclude_ix, axis=0)
+            N = class_ids.shape[0]
+
+        # Resize masks to original image size and set boundary threshold.
+        full_masks = []
+        for i in range(N):
+            # Convert neural network mask to full size mask
+            full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape)
+            full_masks.append(full_mask)
+        full_masks = np.stack(full_masks, axis=-1)\
+            if full_masks else np.empty(original_image_shape[:2] + (0,))
+
+        return boxes, class_ids, scores, full_masks
+
+    def detect(self, images, verbose=0):
+        """Runs the detection pipeline.
+        images: List of images, potentially of different sizes.
+        Returns a list of dicts, one dict per image. The dict contains:
+        rois: [N, (y1, x1, y2, x2)] detection bounding boxes
+        class_ids: [N] int class IDs
+        scores: [N] float probability scores for the class IDs
+        masks: [H, W, N] instance binary masks
+        """
+        assert self.mode == "inference", "Create model in inference mode."
+        assert len(
+            images) == self.config.BATCH_SIZE, "len(images) must be equal to BATCH_SIZE"
+
+        if verbose:
+            log("Processing {} images".format(len(images)))
+            for image in images:
+                log("image", image)
+
+        # Mold inputs to format expected by the neural network
+        molded_images, image_metas, windows = self.mold_inputs(images)
+
+        # Validate image sizes
+        # All images in a batch MUST be of the same size
+        image_shape = molded_images[0].shape
+        for g in molded_images[1:]:
+            assert g.shape == image_shape,\
+                "After resizing, all images must have the same size. Check IMAGE_RESIZE_MODE and image sizes."
+
+        # Anchors
+        anchors = self.get_anchors(image_shape)
+        # Duplicate across the batch dimension because Keras requires it
+        # TODO: can this be optimized to avoid duplicating the anchors?
+        anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape)
+
+        if verbose:
+            log("molded_images", molded_images)
+            log("image_metas", image_metas)
+            log("anchors", anchors)
+        # Run object detection
+        detections, _, _, mrcnn_mask, _, _, _ =\
+            self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
+        # Process detections
+        results = []
+        for i, image in enumerate(images):
+            final_rois, final_class_ids, final_scores, final_masks =\
+                self.unmold_detections(detections[i], mrcnn_mask[i],
+                                       image.shape, molded_images[i].shape,
+                                       windows[i])
+            results.append({
+                "rois": final_rois,
+                "class_ids": final_class_ids,
+                "scores": final_scores,
+                "masks": final_masks,
+            })
+        return results
+
+    def detect_molded(self, molded_images, image_metas, verbose=0):
+        """Runs the detection pipeline, but expect inputs that are
+        molded already. Used mostly for debugging and inspecting
+        the model.
+        molded_images: List of images loaded using load_image_gt()
+        image_metas: image meta data, also returned by load_image_gt()
+        Returns a list of dicts, one dict per image. The dict contains:
+        rois: [N, (y1, x1, y2, x2)] detection bounding boxes
+        class_ids: [N] int class IDs
+        scores: [N] float probability scores for the class IDs
+        masks: [H, W, N] instance binary masks
+        """
+        assert self.mode == "inference", "Create model in inference mode."
+        assert len(molded_images) == self.config.BATCH_SIZE,\
+            "Number of images must be equal to BATCH_SIZE"
+
+        if verbose:
+            log("Processing {} images".format(len(molded_images)))
+            for image in molded_images:
+                log("image", image)
+
+        # Validate image sizes
+        # All images in a batch MUST be of the same size
+        image_shape = molded_images[0].shape
+        for g in molded_images[1:]:
+            assert g.shape == image_shape, "Images must have the same size"
+
+        # Anchors
+        anchors = self.get_anchors(image_shape)
+        # Duplicate across the batch dimension because Keras requires it
+        # TODO: can this be optimized to avoid duplicating the anchors?
+        anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape)
+
+        if verbose:
+            log("molded_images", molded_images)
+            log("image_metas", image_metas)
+            log("anchors", anchors)
+        # Run object detection
+        detections, _, _, mrcnn_mask, _, _, _ =\
+            self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
+        # Process detections
+        results = []
+        for i, image in enumerate(molded_images):
+            window = [0, 0, image.shape[0], image.shape[1]]
+            final_rois, final_class_ids, final_scores, final_masks =\
+                self.unmold_detections(detections[i], mrcnn_mask[i],
+                                       image.shape, molded_images[i].shape,
+                                       window)
+            results.append({
+                "rois": final_rois,
+                "class_ids": final_class_ids,
+                "scores": final_scores,
+                "masks": final_masks,
+            })
+        return results
+
+    def get_anchors(self, image_shape):
+        """Returns anchor pyramid for the given image size."""
+        backbone_shapes = compute_backbone_shapes(self.config, image_shape)
+        # Cache anchors and reuse if image shape is the same
+        if not hasattr(self, "_anchor_cache"):
+            self._anchor_cache = {}
+        if not tuple(image_shape) in self._anchor_cache:
+            # Generate Anchors
+            a = utils.generate_pyramid_anchors(
+                self.config.RPN_ANCHOR_SCALES,
+                self.config.RPN_ANCHOR_RATIOS,
+                backbone_shapes,
+                self.config.BACKBONE_STRIDES,
+                self.config.RPN_ANCHOR_STRIDE)
+            # Keep a copy of the latest anchors in pixel coordinates because
+            # it's used in inspect_model notebooks.
+            # TODO: Remove this after the notebook are refactored to not use it
+            self.anchors = a
+            # Normalize coordinates
+            self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(a, image_shape[:2])
+        return self._anchor_cache[tuple(image_shape)]
+
+    def ancestor(self, tensor, name, checked=None):
+        """Finds the ancestor of a TF tensor in the computation graph.
+        tensor: TensorFlow symbolic tensor.
+        name: Name of ancestor tensor to find
+        checked: For internal use. A list of tensors that were already
+                 searched to avoid loops in traversing the graph.
+        """
+        checked = checked if checked is not None else []
+        # Put a limit on how deep we go to avoid very long loops
+        if len(checked) > 500:
+            return None
+        # Convert name to a regex and allow matching a number prefix
+        # because Keras adds them automatically
+        if isinstance(name, str):
+            name = re.compile(name.replace("/", r"(\_\d+)*/"))
+
+        parents = tensor.op.inputs
+        for p in parents:
+            if p in checked:
+                continue
+            if bool(re.fullmatch(name, p.name)):
+                return p
+            checked.append(p)
+            a = self.ancestor(p, name, checked)
+            if a is not None:
+                return a
+        return None
+
+    def find_trainable_layer(self, layer):
+        """If a layer is encapsulated by another layer, this function
+        digs through the encapsulation and returns the layer that holds
+        the weights.
+        """
+        if layer.__class__.__name__ == 'TimeDistributed':
+            return self.find_trainable_layer(layer.layer)
+        return layer
+
+    def get_trainable_layers(self):
+        """Returns a list of layers that have weights."""
+        layers = []
+        # Loop through all layers
+        for l in self.keras_model.layers:
+            # If layer is a wrapper, find inner trainable layer
+            l = self.find_trainable_layer(l)
+            # Include layer if it has weights
+            if l.get_weights():
+                layers.append(l)
+        return layers
+
+    def run_graph(self, images, outputs, image_metas=None):
+        """Runs a sub-set of the computation graph that computes the given
+        outputs.
+        image_metas: If provided, the images are assumed to be already
+            molded (i.e. resized, padded, and normalized)
+        outputs: List of tuples (name, tensor) to compute. The tensors are
+            symbolic TensorFlow tensors and the names are for easy tracking.
+        Returns an ordered dict of results. Keys are the names received in the
+        input and values are Numpy arrays.
+        """
+        model = self.keras_model
+
+        # Organize desired outputs into an ordered dict
+        outputs = OrderedDict(outputs)
+        for o in outputs.values():
+            assert o is not None
+
+        # Build a Keras function to run parts of the computation graph
+        inputs = model.inputs
+        if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
+            inputs += [K.learning_phase()]
+        kf = K.function(model.inputs, list(outputs.values()))
+
+        # Prepare inputs
+        if image_metas is None:
+            molded_images, image_metas, _ = self.mold_inputs(images)
+        else:
+            molded_images = images
+        image_shape = molded_images[0].shape
+        # Anchors
+        anchors = self.get_anchors(image_shape)
+        # Duplicate across the batch dimension because Keras requires it
+        # TODO: can this be optimized to avoid duplicating the anchors?
+        anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape)
+        model_in = [molded_images, image_metas, anchors]
+
+        # Run inference
+        if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
+            model_in.append(0.)
+        outputs_np = kf(model_in)
+
+        # Pack the generated Numpy arrays into a a dict and log the results.
+        outputs_np = OrderedDict([(k, v)
+                                  for k, v in zip(outputs.keys(), outputs_np)])
+        for k, v in outputs_np.items():
+            log(k, v)
+        return outputs_np
+
+
+############################################################
+#  Data Formatting
+############################################################
+
+def compose_image_meta(image_id, original_image_shape, image_shape,
+                       window, scale, active_class_ids):
+    """Takes attributes of an image and puts them in one 1D array.
+    image_id: An int ID of the image. Useful for debugging.
+    original_image_shape: [H, W, C] before resizing or padding.
+    image_shape: [H, W, C] after resizing and padding
+    window: (y1, x1, y2, x2) in pixels. The area of the image where the real
+            image is (excluding the padding)
+    scale: The scaling factor applied to the original image (float32)
+    active_class_ids: List of class_ids available in the dataset from which
+        the image came. Useful if training on images from multiple datasets
+        where not all classes are present in all datasets.
+    """
+    meta = np.array(
+        [image_id] +                  # size=1
+        list(original_image_shape) +  # size=3
+        list(image_shape) +           # size=3
+        list(window) +                # size=4 (y1, x1, y2, x2) in image cooredinates
+        [scale] +                     # size=1
+        list(active_class_ids)        # size=num_classes
+    )
+    return meta
+
+
+def parse_image_meta(meta):
+    """Parses an array that contains image attributes to its components.
+    See compose_image_meta() for more details.
+    meta: [batch, meta length] where meta length depends on NUM_CLASSES
+    Returns a dict of the parsed values.
+    """
+    image_id = meta[:, 0]
+    original_image_shape = meta[:, 1:4]
+    image_shape = meta[:, 4:7]
+    window = meta[:, 7:11]  # (y1, x1, y2, x2) window of image in in pixels
+    scale = meta[:, 11]
+    active_class_ids = meta[:, 12:]
+    return {
+        "image_id": image_id.astype(np.int32),
+        "original_image_shape": original_image_shape.astype(np.int32),
+        "image_shape": image_shape.astype(np.int32),
+        "window": window.astype(np.int32),
+        "scale": scale.astype(np.float32),
+        "active_class_ids": active_class_ids.astype(np.int32),
+    }
+
+
+def parse_image_meta_graph(meta):
+    """Parses a tensor that contains image attributes to its components.
+    See compose_image_meta() for more details.
+    meta: [batch, meta length] where meta length depends on NUM_CLASSES
+    Returns a dict of the parsed tensors.
+    """
+    image_id = meta[:, 0]
+    original_image_shape = meta[:, 1:4]
+    image_shape = meta[:, 4:7]
+    window = meta[:, 7:11]  # (y1, x1, y2, x2) window of image in in pixels
+    scale = meta[:, 11]
+    active_class_ids = meta[:, 12:]
+    return {
+        "image_id": image_id,
+        "original_image_shape": original_image_shape,
+        "image_shape": image_shape,
+        "window": window,
+        "scale": scale,
+        "active_class_ids": active_class_ids,
+    }
+
+
+def mold_image(images, config):
+    """Expects an RGB image (or array of images) and subtracts
+    the mean pixel and converts it to float. Expects image
+    colors in RGB order.
+    """
+    return images.astype(np.float32) - config.MEAN_PIXEL
+
+
+def unmold_image(normalized_images, config):
+    """Takes a image normalized with mold() and returns the original."""
+    return (normalized_images + config.MEAN_PIXEL).astype(np.uint8)
+
+
+############################################################
+#  Miscellenous Graph Functions
+############################################################
+
+def trim_zeros_graph(boxes, name='trim_zeros'):
+    """Often boxes are represented with matrices of shape [N, 4] and
+    are padded with zeros. This removes zero boxes.
+    boxes: [N, 4] matrix of boxes.
+    non_zeros: [N] a 1D boolean mask identifying the rows to keep
+    """
+    non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool)
+    boxes = tf.boolean_mask(boxes, non_zeros, name=name)
+    return boxes, non_zeros
+
+
+def batch_pack_graph(x, counts, num_rows):
+    """Picks different number of values from each row
+    in x depending on the values in counts.
+    """
+    outputs = []
+    for i in range(num_rows):
+        outputs.append(x[i, :counts[i]])
+    return tf.concat(outputs, axis=0)
+
+
+def norm_boxes_graph(boxes, shape):
+    """Converts boxes from pixel coordinates to normalized coordinates.
+    boxes: [..., (y1, x1, y2, x2)] in pixel coordinates
+    shape: [..., (height, width)] in pixels
+    Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
+    coordinates it's inside the box.
+    Returns:
+        [..., (y1, x1, y2, x2)] in normalized coordinates
+    """
+    h, w = tf.split(tf.cast(shape, tf.float32), 2)
+    scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
+    shift = tf.constant([0., 0., 1., 1.])
+    return tf.divide(boxes - shift, scale)
+
+
+def denorm_boxes_graph(boxes, shape):
+    """Converts boxes from normalized coordinates to pixel coordinates.
+    boxes: [..., (y1, x1, y2, x2)] in normalized coordinates
+    shape: [..., (height, width)] in pixels
+    Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
+    coordinates it's inside the box.
+    Returns:
+        [..., (y1, x1, y2, x2)] in pixel coordinates
+    """
+    h, w = tf.split(tf.cast(shape, tf.float32), 2)
+    scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
+    shift = tf.constant([0., 0., 1., 1.])
+    return tf.cast(tf.round(tf.multiply(boxes, scale) + shift), tf.int32)
\ No newline at end of file
diff --git a/src/tracker/mask_rcnn/parallel_model.py b/src/tracker/mask_rcnn/parallel_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..f521bc0cda35b17844b54c7c910e633acfba8d3a
--- /dev/null
+++ b/src/tracker/mask_rcnn/parallel_model.py
@@ -0,0 +1,173 @@
+"""
+Mask R-CNN
+Multi-GPU Support for Keras.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+Ideas and a small code snippets from these sources:
+https://github.com/fchollet/keras/issues/2436
+https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
+https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
+https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
+"""
+
+import tensorflow as tf
+import keras.backend as K
+import keras.layers as KL
+import keras.models as KM
+
+
+class ParallelModel(KM.Model):
+    """Subclasses the standard Keras Model and adds multi-GPU support.
+    It works by creating a copy of the model on each GPU. Then it slices
+    the inputs and sends a slice to each copy of the model, and then
+    merges the outputs together and applies the loss on the combined
+    outputs.
+    """
+
+    def __init__(self, keras_model, gpu_count):
+        """Class constructor.
+        keras_model: The Keras model to parallelize
+        gpu_count: Number of GPUs. Must be > 1
+        """
+        self.inner_model = keras_model
+        self.gpu_count = gpu_count
+        merged_outputs = self.make_parallel()
+        super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
+                                            outputs=merged_outputs)
+
+    def __getattribute__(self, attrname):
+        """Redirect loading and saving methods to the inner model. That's where
+        the weights are stored."""
+        if 'load' in attrname or 'save' in attrname:
+            return getattr(self.inner_model, attrname)
+        return super(ParallelModel, self).__getattribute__(attrname)
+
+    def summary(self, *args, **kwargs):
+        """Override summary() to display summaries of both, the wrapper
+        and inner models."""
+        super(ParallelModel, self).summary(*args, **kwargs)
+        self.inner_model.summary(*args, **kwargs)
+
+    def make_parallel(self):
+        """Creates a new wrapper model that consists of multiple replicas of
+        the original model placed on different GPUs.
+        """
+        # Slice inputs. Slice inputs on the CPU to avoid sending a copy
+        # of the full inputs to all GPUs. Saves on bandwidth and memory.
+        input_slices = {name: tf.split(x, self.gpu_count)
+                        for name, x in zip(self.inner_model.input_names,
+                                           self.inner_model.inputs)}
+
+        output_names = self.inner_model.output_names
+        outputs_all = []
+        for i in range(len(self.inner_model.outputs)):
+            outputs_all.append([])
+
+        # Run the model call() on each GPU to place the ops there
+        for i in range(self.gpu_count):
+            with tf.device('/gpu:%d' % i):
+                with tf.name_scope('tower_%d' % i):
+                    # Run a slice of inputs through this replica
+                    zipped_inputs = zip(self.inner_model.input_names,
+                                        self.inner_model.inputs)
+                    inputs = [
+                        KL.Lambda(lambda s: input_slices[name][i],
+                                  output_shape=lambda s: (None,) + s[1:])(tensor)
+                        for name, tensor in zipped_inputs]
+                    # Create the model replica and get the outputs
+                    outputs = self.inner_model(inputs)
+                    if not isinstance(outputs, list):
+                        outputs = [outputs]
+                    # Save the outputs for merging back together later
+                    for l, o in enumerate(outputs):
+                        outputs_all[l].append(o)
+
+        # Merge outputs on CPU
+        with tf.device('/cpu:0'):
+            merged = []
+            for outputs, name in zip(outputs_all, output_names):
+                # Concatenate or average outputs?
+                # Outputs usually have a batch dimension and we concatenate
+                # across it. If they don't, then the output is likely a loss
+                # or a metric value that gets averaged across the batch.
+                # Keras expects losses and metrics to be scalars.
+                if K.int_shape(outputs[0]) == ():
+                    # Average
+                    m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs)
+                else:
+                    # Concatenate
+                    m = KL.Concatenate(axis=0, name=name)(outputs)
+                merged.append(m)
+        return merged
+
+
+if __name__ == "__main__":
+    # Testing code below. It creates a simple model to train on MNIST and
+    # tries to run it on 2 GPUs. It saves the graph so it can be viewed
+    # in TensorBoard. Run it as:
+    #
+    # python3 parallel_model.py
+
+    import os
+    import numpy as np
+    import keras.optimizers
+    from keras.datasets import mnist
+    from keras.preprocessing.image import ImageDataGenerator
+
+    GPU_COUNT = 2
+
+    # Root directory of the project
+    ROOT_DIR = os.path.abspath("../")
+
+    # Directory to save logs and trained model
+    MODEL_DIR = os.path.join(ROOT_DIR, "logs")
+
+    def build_model(x_train, num_classes):
+        # Reset default graph. Keras leaves old ops in the graph,
+        # which are ignored for execution but clutter graph
+        # visualization in TensorBoard.
+        tf.reset_default_graph()
+
+        inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
+        x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
+                      name="conv1")(inputs)
+        x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
+                      name="conv2")(x)
+        x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
+        x = KL.Flatten(name="flat1")(x)
+        x = KL.Dense(128, activation='relu', name="dense1")(x)
+        x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
+
+        return KM.Model(inputs, x, "digit_classifier_model")
+
+    # Load MNIST Data
+    (x_train, y_train), (x_test, y_test) = mnist.load_data()
+    x_train = np.expand_dims(x_train, -1).astype('float32') / 255
+    x_test = np.expand_dims(x_test, -1).astype('float32') / 255
+
+    print('x_train shape:', x_train.shape)
+    print('x_test shape:', x_test.shape)
+
+    # Build data generator and model
+    datagen = ImageDataGenerator()
+    model = build_model(x_train, 10)
+
+    # Add multi-GPU support.
+    model = ParallelModel(model, GPU_COUNT)
+
+    optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
+
+    model.compile(loss='sparse_categorical_crossentropy',
+                  optimizer=optimizer, metrics=['accuracy'])
+
+    model.summary()
+
+    # Train
+    model.fit_generator(
+        datagen.flow(x_train, y_train, batch_size=64),
+        steps_per_epoch=50, epochs=10, verbose=1,
+        validation_data=(x_test, y_test),
+        callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
+                                               write_graph=True)]
+    )
\ No newline at end of file
diff --git a/src/tracker/mask_rcnn/utils.py b/src/tracker/mask_rcnn/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a7061230cc45b265ed8cf933b25b4a09ac21c9d
--- /dev/null
+++ b/src/tracker/mask_rcnn/utils.py
@@ -0,0 +1,879 @@
+"""
+Mask R-CNN
+Common utility functions and classes.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+
+import sys
+import os
+import logging
+import math
+import random
+import numpy as np
+import tensorflow as tf
+import scipy
+import skimage.color
+import skimage.io
+import skimage.transform
+import urllib.request
+import shutil
+import warnings
+from distutils.version import LooseVersion
+
+# URL from which to download the latest COCO trained weights
+COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
+
+
+############################################################
+#  Bounding Boxes
+############################################################
+
+def extract_bboxes(mask):
+    """Compute bounding boxes from masks.
+    mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
+    Returns: bbox array [num_instances, (y1, x1, y2, x2)].
+    """
+    boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
+    for i in range(mask.shape[-1]):
+        m = mask[:, :, i]
+        # Bounding box.
+        horizontal_indicies = np.where(np.any(m, axis=0))[0]
+        vertical_indicies = np.where(np.any(m, axis=1))[0]
+        if horizontal_indicies.shape[0]:
+            x1, x2 = horizontal_indicies[[0, -1]]
+            y1, y2 = vertical_indicies[[0, -1]]
+            # x2 and y2 should not be part of the box. Increment by 1.
+            x2 += 1
+            y2 += 1
+        else:
+            # No mask for this instance. Might happen due to
+            # resizing or cropping. Set bbox to zeros
+            x1, x2, y1, y2 = 0, 0, 0, 0
+        boxes[i] = np.array([y1, x1, y2, x2])
+    return boxes.astype(np.int32)
+
+
+def compute_iou(box, boxes, box_area, boxes_area):
+    """Calculates IoU of the given box with the array of the given boxes.
+    box: 1D vector [y1, x1, y2, x2]
+    boxes: [boxes_count, (y1, x1, y2, x2)]
+    box_area: float. the area of 'box'
+    boxes_area: array of length boxes_count.
+    Note: the areas are passed in rather than calculated here for
+    efficiency. Calculate once in the caller to avoid duplicate work.
+    """
+    # Calculate intersection areas
+    y1 = np.maximum(box[0], boxes[:, 0])
+    y2 = np.minimum(box[2], boxes[:, 2])
+    x1 = np.maximum(box[1], boxes[:, 1])
+    x2 = np.minimum(box[3], boxes[:, 3])
+    intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
+    union = box_area + boxes_area[:] - intersection[:]
+    iou = intersection / union
+    return iou
+
+
+def compute_overlaps(boxes1, boxes2):
+    """Computes IoU overlaps between two sets of boxes.
+    boxes1, boxes2: [N, (y1, x1, y2, x2)].
+    For better performance, pass the largest set first and the smaller second.
+    """
+    # Areas of anchors and GT boxes
+    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
+    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
+
+    # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
+    # Each cell contains the IoU value.
+    overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
+    for i in range(overlaps.shape[1]):
+        box2 = boxes2[i]
+        overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
+    return overlaps
+
+
+def compute_overlaps_masks(masks1, masks2):
+    """Computes IoU overlaps between two sets of masks.
+    masks1, masks2: [Height, Width, instances]
+    """
+    
+    # If either set of masks is empty return empty result
+    if masks1.shape[-1] == 0 or masks2.shape[-1] == 0:
+        return np.zeros((masks1.shape[-1], masks2.shape[-1]))
+    # flatten masks and compute their areas
+    masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
+    masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
+    area1 = np.sum(masks1, axis=0)
+    area2 = np.sum(masks2, axis=0)
+
+    # intersections and union
+    intersections = np.dot(masks1.T, masks2)
+    union = area1[:, None] + area2[None, :] - intersections
+    overlaps = intersections / union
+
+    return overlaps
+
+
+def non_max_suppression(boxes, scores, threshold):
+    """Performs non-maximum suppression and returns indices of kept boxes.
+    boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
+    scores: 1-D array of box scores.
+    threshold: Float. IoU threshold to use for filtering.
+    """
+    assert boxes.shape[0] > 0
+    if boxes.dtype.kind != "f":
+        boxes = boxes.astype(np.float32)
+
+    # Compute box areas
+    y1 = boxes[:, 0]
+    x1 = boxes[:, 1]
+    y2 = boxes[:, 2]
+    x2 = boxes[:, 3]
+    area = (y2 - y1) * (x2 - x1)
+
+    # Get indicies of boxes sorted by scores (highest first)
+    ixs = scores.argsort()[::-1]
+
+    pick = []
+    while len(ixs) > 0:
+        # Pick top box and add its index to the list
+        i = ixs[0]
+        pick.append(i)
+        # Compute IoU of the picked box with the rest
+        iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
+        # Identify boxes with IoU over the threshold. This
+        # returns indices into ixs[1:], so add 1 to get
+        # indices into ixs.
+        remove_ixs = np.where(iou > threshold)[0] + 1
+        # Remove indices of the picked and overlapped boxes.
+        ixs = np.delete(ixs, remove_ixs)
+        ixs = np.delete(ixs, 0)
+    return np.array(pick, dtype=np.int32)
+
+
+def apply_box_deltas(boxes, deltas):
+    """Applies the given deltas to the given boxes.
+    boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
+    deltas: [N, (dy, dx, log(dh), log(dw))]
+    """
+    boxes = boxes.astype(np.float32)
+    # Convert to y, x, h, w
+    height = boxes[:, 2] - boxes[:, 0]
+    width = boxes[:, 3] - boxes[:, 1]
+    center_y = boxes[:, 0] + 0.5 * height
+    center_x = boxes[:, 1] + 0.5 * width
+    # Apply deltas
+    center_y += deltas[:, 0] * height
+    center_x += deltas[:, 1] * width
+    height *= np.exp(deltas[:, 2])
+    width *= np.exp(deltas[:, 3])
+    # Convert back to y1, x1, y2, x2
+    y1 = center_y - 0.5 * height
+    x1 = center_x - 0.5 * width
+    y2 = y1 + height
+    x2 = x1 + width
+    return np.stack([y1, x1, y2, x2], axis=1)
+
+
+def box_refinement_graph(box, gt_box):
+    """Compute refinement needed to transform box to gt_box.
+    box and gt_box are [N, (y1, x1, y2, x2)]
+    """
+    box = tf.cast(box, tf.float32)
+    gt_box = tf.cast(gt_box, tf.float32)
+
+    height = box[:, 2] - box[:, 0]
+    width = box[:, 3] - box[:, 1]
+    center_y = box[:, 0] + 0.5 * height
+    center_x = box[:, 1] + 0.5 * width
+
+    gt_height = gt_box[:, 2] - gt_box[:, 0]
+    gt_width = gt_box[:, 3] - gt_box[:, 1]
+    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
+    gt_center_x = gt_box[:, 1] + 0.5 * gt_width
+
+    dy = (gt_center_y - center_y) / height
+    dx = (gt_center_x - center_x) / width
+    dh = tf.log(gt_height / height)
+    dw = tf.log(gt_width / width)
+
+    result = tf.stack([dy, dx, dh, dw], axis=1)
+    return result
+
+
+def box_refinement(box, gt_box):
+    """Compute refinement needed to transform box to gt_box.
+    box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
+    assumed to be outside the box.
+    """
+    box = box.astype(np.float32)
+    gt_box = gt_box.astype(np.float32)
+
+    height = box[:, 2] - box[:, 0]
+    width = box[:, 3] - box[:, 1]
+    center_y = box[:, 0] + 0.5 * height
+    center_x = box[:, 1] + 0.5 * width
+
+    gt_height = gt_box[:, 2] - gt_box[:, 0]
+    gt_width = gt_box[:, 3] - gt_box[:, 1]
+    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
+    gt_center_x = gt_box[:, 1] + 0.5 * gt_width
+
+    dy = (gt_center_y - center_y) / height
+    dx = (gt_center_x - center_x) / width
+    dh = np.log(gt_height / height)
+    dw = np.log(gt_width / width)
+
+    return np.stack([dy, dx, dh, dw], axis=1)
+
+
+############################################################
+#  Dataset
+############################################################
+
+class Dataset(object):
+    """The base class for dataset classes.
+    To use it, create a new class that adds functions specific to the dataset
+    you want to use. For example:
+    class CatsAndDogsDataset(Dataset):
+        def load_cats_and_dogs(self):
+            ...
+        def load_mask(self, image_id):
+            ...
+        def image_reference(self, image_id):
+            ...
+    See COCODataset and ShapesDataset as examples.
+    """
+
+    def __init__(self, class_map=None):
+        self._image_ids = []
+        self.image_info = []
+        # Background is always the first class
+        self.class_info = [{"source": "", "id": 0, "name": "BG"}]
+        self.source_class_ids = {}
+
+    def add_class(self, source, class_id, class_name):
+        assert "." not in source, "Source name cannot contain a dot"
+        # Does the class exist already?
+        for info in self.class_info:
+            if info['source'] == source and info["id"] == class_id:
+                # source.class_id combination already available, skip
+                return
+        # Add the class
+        self.class_info.append({
+            "source": source,
+            "id": class_id,
+            "name": class_name,
+        })
+
+    def add_image(self, source, image_id, path, **kwargs):
+        image_info = {
+            "id": image_id,
+            "source": source,
+            "path": path,
+        }
+        image_info.update(kwargs)
+        self.image_info.append(image_info)
+
+    def image_reference(self, image_id):
+        """Return a link to the image in its source Website or details about
+        the image that help looking it up or debugging it.
+        Override for your dataset, but pass to this function
+        if you encounter images not in your dataset.
+        """
+        return ""
+
+    def prepare(self, class_map=None):
+        """Prepares the Dataset class for use.
+        TODO: class map is not supported yet. When done, it should handle mapping
+              classes from different datasets to the same class ID.
+        """
+
+        def clean_name(name):
+            """Returns a shorter version of object names for cleaner display."""
+            return ",".join(name.split(",")[:1])
+
+        # Build (or rebuild) everything else from the info dicts.
+        self.num_classes = len(self.class_info)
+        self.class_ids = np.arange(self.num_classes)
+        self.class_names = [clean_name(c["name"]) for c in self.class_info]
+        self.num_images = len(self.image_info)
+        self._image_ids = np.arange(self.num_images)
+
+        # Mapping from source class and image IDs to internal IDs
+        self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
+                                      for info, id in zip(self.class_info, self.class_ids)}
+        self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id
+                                      for info, id in zip(self.image_info, self.image_ids)}
+
+        # Map sources to class_ids they support
+        self.sources = list(set([i['source'] for i in self.class_info]))
+        self.source_class_ids = {}
+        # Loop over datasets
+        for source in self.sources:
+            self.source_class_ids[source] = []
+            # Find classes that belong to this dataset
+            for i, info in enumerate(self.class_info):
+                # Include BG class in all datasets
+                if i == 0 or source == info['source']:
+                    self.source_class_ids[source].append(i)
+
+    def map_source_class_id(self, source_class_id):
+        """Takes a source class ID and returns the int class ID assigned to it.
+        For example:
+        dataset.map_source_class_id("coco.12") -> 23
+        """
+        return self.class_from_source_map[source_class_id]
+
+    def get_source_class_id(self, class_id, source):
+        """Map an internal class ID to the corresponding class ID in the source dataset."""
+        info = self.class_info[class_id]
+        assert info['source'] == source
+        return info['id']
+
+    @property
+    def image_ids(self):
+        return self._image_ids
+
+    def source_image_link(self, image_id):
+        """Returns the path or URL to the image.
+        Override this to return a URL to the image if it's available online for easy
+        debugging.
+        """
+        return self.image_info[image_id]["path"]
+
+    def load_image(self, image_id):
+        """Load the specified image and return a [H,W,3] Numpy array.
+        """
+        # Load image
+        image = skimage.io.imread(self.image_info[image_id]['path'])
+        # If grayscale. Convert to RGB for consistency.
+        if image.ndim != 3:
+            image = skimage.color.gray2rgb(image)
+        # If has an alpha channel, remove it for consistency
+        if image.shape[-1] == 4:
+            image = image[..., :3]
+        return image
+
+    def load_mask(self, image_id):
+        """Load instance masks for the given image.
+        Different datasets use different ways to store masks. Override this
+        method to load instance masks and return them in the form of am
+        array of binary masks of shape [height, width, instances].
+        Returns:
+            masks: A bool array of shape [height, width, instance count] with
+                a binary mask per instance.
+            class_ids: a 1D array of class IDs of the instance masks.
+        """
+        # Override this function to load a mask from your dataset.
+        # Otherwise, it returns an empty mask.
+        logging.warning("You are using the default load_mask(), maybe you need to define your own one.")
+        mask = np.empty([0, 0, 0])
+        class_ids = np.empty([0], np.int32)
+        return mask, class_ids
+
+
+def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"):
+    """Resizes an image keeping the aspect ratio unchanged.
+    min_dim: if provided, resizes the image such that it's smaller
+        dimension == min_dim
+    max_dim: if provided, ensures that the image longest side doesn't
+        exceed this value.
+    min_scale: if provided, ensure that the image is scaled up by at least
+        this percent even if min_dim doesn't require it.
+    mode: Resizing mode.
+        none: No resizing. Return the image unchanged.
+        square: Resize and pad with zeros to get a square image
+            of size [max_dim, max_dim].
+        pad64: Pads width and height with zeros to make them multiples of 64.
+               If min_dim or min_scale are provided, it scales the image up
+               before padding. max_dim is ignored in this mode.
+               The multiple of 64 is needed to ensure smooth scaling of feature
+               maps up and down the 6 levels of the FPN pyramid (2**6=64).
+        crop: Picks random crops from the image. First, scales the image based
+              on min_dim and min_scale, then picks a random crop of
+              size min_dim x min_dim. Can be used in training only.
+              max_dim is not used in this mode.
+    Returns:
+    image: the resized image
+    window: (y1, x1, y2, x2). If max_dim is provided, padding might
+        be inserted in the returned image. If so, this window is the
+        coordinates of the image part of the full image (excluding
+        the padding). The x2, y2 pixels are not included.
+    scale: The scale factor used to resize the image
+    padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
+    """
+    # Keep track of image dtype and return results in the same dtype
+    image_dtype = image.dtype
+    # Default window (y1, x1, y2, x2) and default scale == 1.
+    h, w = image.shape[:2]
+    window = (0, 0, h, w)
+    scale = 1
+    padding = [(0, 0), (0, 0), (0, 0)]
+    crop = None
+
+    if mode == "none":
+        return image, window, scale, padding, crop
+
+    # Scale?
+    if min_dim:
+        # Scale up but not down
+        scale = max(1, min_dim / min(h, w))
+    if min_scale and scale < min_scale:
+        scale = min_scale
+
+    # Does it exceed max dim?
+    if max_dim and mode == "square":
+        image_max = max(h, w)
+        if round(image_max * scale) > max_dim:
+            scale = max_dim / image_max
+
+    # Resize image using bilinear interpolation
+    if scale != 1:
+        image = resize(image, (round(h * scale), round(w * scale)),
+                       preserve_range=True)
+
+    # Need padding or cropping?
+    if mode == "square":
+        # Get new height and width
+        h, w = image.shape[:2]
+        top_pad = (max_dim - h) // 2
+        bottom_pad = max_dim - h - top_pad
+        left_pad = (max_dim - w) // 2
+        right_pad = max_dim - w - left_pad
+        padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
+        image = np.pad(image, padding, mode='constant', constant_values=0)
+        window = (top_pad, left_pad, h + top_pad, w + left_pad)
+    elif mode == "pad64":
+        h, w = image.shape[:2]
+        # Both sides must be divisible by 64
+        assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
+        # Height
+        if h % 64 > 0:
+            max_h = h - (h % 64) + 64
+            top_pad = (max_h - h) // 2
+            bottom_pad = max_h - h - top_pad
+        else:
+            top_pad = bottom_pad = 0
+        # Width
+        if w % 64 > 0:
+            max_w = w - (w % 64) + 64
+            left_pad = (max_w - w) // 2
+            right_pad = max_w - w - left_pad
+        else:
+            left_pad = right_pad = 0
+        padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
+        image = np.pad(image, padding, mode='constant', constant_values=0)
+        window = (top_pad, left_pad, h + top_pad, w + left_pad)
+    elif mode == "crop":
+        # Pick a random crop
+        h, w = image.shape[:2]
+        y = random.randint(0, (h - min_dim))
+        x = random.randint(0, (w - min_dim))
+        crop = (y, x, min_dim, min_dim)
+        image = image[y:y + min_dim, x:x + min_dim]
+        window = (0, 0, min_dim, min_dim)
+    else:
+        raise Exception("Mode {} not supported".format(mode))
+    return image.astype(image_dtype), window, scale, padding, crop
+
+
+def resize_mask(mask, scale, padding, crop=None):
+    """Resizes a mask using the given scale and padding.
+    Typically, you get the scale and padding from resize_image() to
+    ensure both, the image and the mask, are resized consistently.
+    scale: mask scaling factor
+    padding: Padding to add to the mask in the form
+            [(top, bottom), (left, right), (0, 0)]
+    """
+    # Suppress warning from scipy 0.13.0, the output shape of zoom() is
+    # calculated with round() instead of int()
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
+    if crop is not None:
+        y, x, h, w = crop
+        mask = mask[y:y + h, x:x + w]
+    else:
+        mask = np.pad(mask, padding, mode='constant', constant_values=0)
+    return mask
+
+
+def minimize_mask(bbox, mask, mini_shape):
+    """Resize masks to a smaller version to reduce memory load.
+    Mini-masks can be resized back to image scale using expand_masks()
+    See inspect_data.ipynb notebook for more details.
+    """
+    mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
+    for i in range(mask.shape[-1]):
+        # Pick slice and cast to bool in case load_mask() returned wrong dtype
+        m = mask[:, :, i].astype(bool)
+        y1, x1, y2, x2 = bbox[i][:4]
+        m = m[y1:y2, x1:x2]
+        if m.size == 0:
+            raise Exception("Invalid bounding box with area of zero")
+        # Resize with bilinear interpolation
+        m = resize(m, mini_shape)
+        mini_mask[:, :, i] = np.around(m).astype(np.bool)
+    return mini_mask
+
+
+def expand_mask(bbox, mini_mask, image_shape):
+    """Resizes mini masks back to image size. Reverses the change
+    of minimize_mask().
+    See inspect_data.ipynb notebook for more details.
+    """
+    mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
+    for i in range(mask.shape[-1]):
+        m = mini_mask[:, :, i]
+        y1, x1, y2, x2 = bbox[i][:4]
+        h = y2 - y1
+        w = x2 - x1
+        # Resize with bilinear interpolation
+        m = resize(m, (h, w))
+        mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
+    return mask
+
+
+# TODO: Build and use this function to reduce code duplication
+def mold_mask(mask, config):
+    pass
+
+
+def unmold_mask(mask, bbox, image_shape):
+    """Converts a mask generated by the neural network to a format similar
+    to its original shape.
+    mask: [height, width] of type float. A small, typically 28x28 mask.
+    bbox: [y1, x1, y2, x2]. The box to fit the mask in.
+    Returns a binary mask with the same size as the original image.
+    """
+    threshold = 0.5
+    y1, x1, y2, x2 = bbox
+    mask = resize(mask, (y2 - y1, x2 - x1))
+    mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
+
+    # Put the mask in the right location.
+    full_mask = np.zeros(image_shape[:2], dtype=np.bool)
+    full_mask[y1:y2, x1:x2] = mask
+    return full_mask
+
+
+############################################################
+#  Anchors
+############################################################
+
+def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
+    """
+    scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
+    ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
+    shape: [height, width] spatial shape of the feature map over which
+            to generate anchors.
+    feature_stride: Stride of the feature map relative to the image in pixels.
+    anchor_stride: Stride of anchors on the feature map. For example, if the
+        value is 2 then generate anchors for every other feature map pixel.
+    """
+    # Get all combinations of scales and ratios
+    scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
+    scales = scales.flatten()
+    ratios = ratios.flatten()
+
+    # Enumerate heights and widths from scales and ratios
+    heights = scales / np.sqrt(ratios)
+    widths = scales * np.sqrt(ratios)
+
+    # Enumerate shifts in feature space
+    shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
+    shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
+    shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
+
+    # Enumerate combinations of shifts, widths, and heights
+    box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
+    box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
+
+    # Reshape to get a list of (y, x) and a list of (h, w)
+    box_centers = np.stack(
+        [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
+    box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
+
+    # Convert to corner coordinates (y1, x1, y2, x2)
+    boxes = np.concatenate([box_centers - 0.5 * box_sizes,
+                            box_centers + 0.5 * box_sizes], axis=1)
+    return boxes
+
+
+def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
+                             anchor_stride):
+    """Generate anchors at different levels of a feature pyramid. Each scale
+    is associated with a level of the pyramid, but each ratio is used in
+    all levels of the pyramid.
+    Returns:
+    anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
+        with the same order of the given scales. So, anchors of scale[0] come
+        first, then anchors of scale[1], and so on.
+    """
+    # Anchors
+    # [anchor_count, (y1, x1, y2, x2)]
+    anchors = []
+    for i in range(len(scales)):
+        anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
+                                        feature_strides[i], anchor_stride))
+    return np.concatenate(anchors, axis=0)
+
+
+############################################################
+#  Miscellaneous
+############################################################
+
+def trim_zeros(x):
+    """It's common to have tensors larger than the available data and
+    pad with zeros. This function removes rows that are all zeros.
+    x: [rows, columns].
+    """
+    assert len(x.shape) == 2
+    return x[~np.all(x == 0, axis=1)]
+
+
+def compute_matches(gt_boxes, gt_class_ids, gt_masks,
+                    pred_boxes, pred_class_ids, pred_scores, pred_masks,
+                    iou_threshold=0.5, score_threshold=0.0):
+    """Finds matches between prediction and ground truth instances.
+    Returns:
+        gt_match: 1-D array. For each GT box it has the index of the matched
+                  predicted box.
+        pred_match: 1-D array. For each predicted box, it has the index of
+                    the matched ground truth box.
+        overlaps: [pred_boxes, gt_boxes] IoU overlaps.
+    """
+    # Trim zero padding
+    # TODO: cleaner to do zero unpadding upstream
+    gt_boxes = trim_zeros(gt_boxes)
+    gt_masks = gt_masks[..., :gt_boxes.shape[0]]
+    pred_boxes = trim_zeros(pred_boxes)
+    pred_scores = pred_scores[:pred_boxes.shape[0]]
+    # Sort predictions by score from high to low
+    indices = np.argsort(pred_scores)[::-1]
+    pred_boxes = pred_boxes[indices]
+    pred_class_ids = pred_class_ids[indices]
+    pred_scores = pred_scores[indices]
+    pred_masks = pred_masks[..., indices]
+
+    # Compute IoU overlaps [pred_masks, gt_masks]
+    overlaps = compute_overlaps_masks(pred_masks, gt_masks)
+
+    # Loop through predictions and find matching ground truth boxes
+    match_count = 0
+    pred_match = -1 * np.ones([pred_boxes.shape[0]])
+    gt_match = -1 * np.ones([gt_boxes.shape[0]])
+    for i in range(len(pred_boxes)):
+        # Find best matching ground truth box
+        # 1. Sort matches by score
+        sorted_ixs = np.argsort(overlaps[i])[::-1]
+        # 2. Remove low scores
+        low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
+        if low_score_idx.size > 0:
+            sorted_ixs = sorted_ixs[:low_score_idx[0]]
+        # 3. Find the match
+        for j in sorted_ixs:
+            # If ground truth box is already matched, go to next one
+            if gt_match[j] > -1:
+                continue
+            # If we reach IoU smaller than the threshold, end the loop
+            iou = overlaps[i, j]
+            if iou < iou_threshold:
+                break
+            # Do we have a match?
+            if pred_class_ids[i] == gt_class_ids[j]:
+                match_count += 1
+                gt_match[j] = i
+                pred_match[i] = j
+                break
+
+    return gt_match, pred_match, overlaps
+
+
+def compute_ap(gt_boxes, gt_class_ids, gt_masks,
+               pred_boxes, pred_class_ids, pred_scores, pred_masks,
+               iou_threshold=0.5):
+    """Compute Average Precision at a set IoU threshold (default 0.5).
+    Returns:
+    mAP: Mean Average Precision
+    precisions: List of precisions at different class score thresholds.
+    recalls: List of recall values at different class score thresholds.
+    overlaps: [pred_boxes, gt_boxes] IoU overlaps.
+    """
+    # Get matches and overlaps
+    gt_match, pred_match, overlaps = compute_matches(
+        gt_boxes, gt_class_ids, gt_masks,
+        pred_boxes, pred_class_ids, pred_scores, pred_masks,
+        iou_threshold)
+
+    # Compute precision and recall at each prediction box step
+    precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
+    recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
+
+    # Pad with start and end values to simplify the math
+    precisions = np.concatenate([[0], precisions, [0]])
+    recalls = np.concatenate([[0], recalls, [1]])
+
+    # Ensure precision values decrease but don't increase. This way, the
+    # precision value at each recall threshold is the maximum it can be
+    # for all following recall thresholds, as specified by the VOC paper.
+    for i in range(len(precisions) - 2, -1, -1):
+        precisions[i] = np.maximum(precisions[i], precisions[i + 1])
+
+    # Compute mean AP over recall range
+    indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
+    mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
+                 precisions[indices])
+
+    return mAP, precisions, recalls, overlaps
+
+
+def compute_ap_range(gt_box, gt_class_id, gt_mask,
+                     pred_box, pred_class_id, pred_score, pred_mask,
+                     iou_thresholds=None, verbose=1):
+    """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95."""
+    # Default is 0.5 to 0.95 with increments of 0.05
+    iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05)
+    
+    # Compute AP over range of IoU thresholds
+    AP = []
+    for iou_threshold in iou_thresholds:
+        ap, precisions, recalls, overlaps =\
+            compute_ap(gt_box, gt_class_id, gt_mask,
+                        pred_box, pred_class_id, pred_score, pred_mask,
+                        iou_threshold=iou_threshold)
+        if verbose:
+            print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap))
+        AP.append(ap)
+    AP = np.array(AP).mean()
+    if verbose:
+        print("AP @{:.2f}-{:.2f}:\t {:.3f}".format(
+            iou_thresholds[0], iou_thresholds[-1], AP))
+    return AP
+
+
+def compute_recall(pred_boxes, gt_boxes, iou):
+    """Compute the recall at the given IoU threshold. It's an indication
+    of how many GT boxes were found by the given prediction boxes.
+    pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
+    gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
+    """
+    # Measure overlaps
+    overlaps = compute_overlaps(pred_boxes, gt_boxes)
+    iou_max = np.max(overlaps, axis=1)
+    iou_argmax = np.argmax(overlaps, axis=1)
+    positive_ids = np.where(iou_max >= iou)[0]
+    matched_gt_boxes = iou_argmax[positive_ids]
+
+    recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
+    return recall, positive_ids
+
+
+# ## Batch Slicing
+# Some custom layers support a batch size of 1 only, and require a lot of work
+# to support batches greater than 1. This function slices an input tensor
+# across the batch dimension and feeds batches of size 1. Effectively,
+# an easy way to support batches > 1 quickly with little code modification.
+# In the long run, it's more efficient to modify the code to support large
+# batches and getting rid of this function. Consider this a temporary solution
+def batch_slice(inputs, graph_fn, batch_size, names=None):
+    """Splits inputs into slices and feeds each slice to a copy of the given
+    computation graph and then combines the results. It allows you to run a
+    graph on a batch of inputs even if the graph is written to support one
+    instance only.
+    inputs: list of tensors. All must have the same first dimension length
+    graph_fn: A function that returns a TF tensor that's part of a graph.
+    batch_size: number of slices to divide the data into.
+    names: If provided, assigns names to the resulting tensors.
+    """
+    if not isinstance(inputs, list):
+        inputs = [inputs]
+
+    outputs = []
+    for i in range(batch_size):
+        inputs_slice = [x[i] for x in inputs]
+        output_slice = graph_fn(*inputs_slice)
+        if not isinstance(output_slice, (tuple, list)):
+            output_slice = [output_slice]
+        outputs.append(output_slice)
+    # Change outputs from a list of slices where each is
+    # a list of outputs to a list of outputs and each has
+    # a list of slices
+    outputs = list(zip(*outputs))
+
+    if names is None:
+        names = [None] * len(outputs)
+
+    result = [tf.stack(o, axis=0, name=n)
+              for o, n in zip(outputs, names)]
+    if len(result) == 1:
+        result = result[0]
+
+    return result
+
+
+def download_trained_weights(coco_model_path, verbose=1):
+    """Download COCO trained weights from Releases.
+    coco_model_path: local path of COCO trained weights
+    """
+    if verbose > 0:
+        print("Downloading pretrained model to " + coco_model_path + " ...")
+    with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
+        shutil.copyfileobj(resp, out)
+    if verbose > 0:
+        print("... done downloading pretrained model!")
+
+
+def norm_boxes(boxes, shape):
+    """Converts boxes from pixel coordinates to normalized coordinates.
+    boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
+    shape: [..., (height, width)] in pixels
+    Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
+    coordinates it's inside the box.
+    Returns:
+        [N, (y1, x1, y2, x2)] in normalized coordinates
+    """
+    h, w = shape
+    scale = np.array([h - 1, w - 1, h - 1, w - 1])
+    shift = np.array([0, 0, 1, 1])
+    return np.divide((boxes - shift), scale).astype(np.float32)
+
+
+def denorm_boxes(boxes, shape):
+    """Converts boxes from normalized coordinates to pixel coordinates.
+    boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
+    shape: [..., (height, width)] in pixels
+    Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
+    coordinates it's inside the box.
+    Returns:
+        [N, (y1, x1, y2, x2)] in pixel coordinates
+    """
+    h, w = shape
+    scale = np.array([h - 1, w - 1, h - 1, w - 1])
+    shift = np.array([0, 0, 1, 1])
+    return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
+
+
+def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
+           preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
+    """A wrapper for Scikit-Image resize().
+    Scikit-Image generates warnings on every call to resize() if it doesn't
+    receive the right parameters. The right parameters depend on the version
+    of skimage. This solves the problem by using different parameters per
+    version. And it provides a central place to control resizing defaults.
+    """
+    if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
+        # New in 0.14: anti_aliasing. Default it to False for backward
+        # compatibility with skimage 0.13.
+        return skimage.transform.resize(
+            image, output_shape,
+            order=order, mode=mode, cval=cval, clip=clip,
+            preserve_range=preserve_range, anti_aliasing=anti_aliasing,
+            anti_aliasing_sigma=anti_aliasing_sigma)
+    else:
+        return skimage.transform.resize(
+            image, output_shape,
+            order=order, mode=mode, cval=cval, clip=clip,
+            preserve_range=preserve_range)
\ No newline at end of file
diff --git a/src/tracker/mask_rcnn/visualize.py b/src/tracker/mask_rcnn/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..10afc6d7b8955e65cc8969ef8bfb7ff5e769b8b0
--- /dev/null
+++ b/src/tracker/mask_rcnn/visualize.py
@@ -0,0 +1,617 @@
+"""
+Mask R-CNN
+Display and Visualization Functions.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+
+import os
+import cv2
+import sys
+import random
+import itertools
+import colorsys
+
+import numpy as np
+from skimage.measure import find_contours
+import matplotlib.pyplot as plt
+from matplotlib import patches,  lines
+from matplotlib.patches import Polygon
+from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+import IPython.display
+
+# Root directory of the project
+ROOT_DIR = os.path.abspath("../")
+
+# Import Mask RCNN
+sys.path.append(ROOT_DIR)  # To find local version of the library
+from mrcnn import utils
+
+
+############################################################
+#  Visualization
+############################################################
+
+def display_images(images, titles=None, cols=4, cmap=None, norm=None,
+                   interpolation=None):
+    """Display the given set of images, optionally with titles.
+    images: list or array of image tensors in HWC format.
+    titles: optional. A list of titles to display with each image.
+    cols: number of images per row
+    cmap: Optional. Color map to use. For example, "Blues".
+    norm: Optional. A Normalize instance to map values to colors.
+    interpolation: Optional. Image interpolation to use for display.
+    """
+    titles = titles if titles is not None else [""] * len(images)
+    rows = len(images) // cols + 1
+    plt.figure(figsize=(14, 14 * rows // cols))
+    i = 1
+    for image, title in zip(images, titles):
+        plt.subplot(rows, cols, i)
+        plt.title(title, fontsize=9)
+        plt.axis('off')
+        plt.imshow(image.astype(np.uint8), cmap=cmap,
+                   norm=norm, interpolation=interpolation)
+        i += 1
+    plt.show()
+
+
+def random_colors(N, bright=True):
+    """
+    Generate random colors.
+    To get visually distinct colors, generate them in HSV space then
+    convert to RGB.
+    """
+    brightness = 1.0 if bright else 0.7
+    hsv = [(i / N, 1, brightness) for i in range(N)]
+    colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
+    random.shuffle(colors)
+    return colors
+
+
+def apply_mask(image, mask, color, alpha=0.5):
+    """Apply the given mask to the image.
+    """
+    for c in range(3):
+        image[:, :, c] = np.where(mask == 1,
+                                  image[:, :, c] *
+                                  (1 - alpha) + alpha * color[c] * 255,
+                                  image[:, :, c])
+    return image
+
+
+def display_instances(image, boxes, masks, class_ids, class_names,
+                      scores=None, title="",
+                      figsize=(16, 16), ax=None,
+                      show_mask=True, show_bbox=True,
+                      colors=None, captions=None):
+    """
+    boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
+    masks: [height, width, num_instances]
+    class_ids: [num_instances]
+    class_names: list of class names of the dataset
+    scores: (optional) confidence scores for each box
+    title: (optional) Figure title
+    show_mask, show_bbox: To show masks and bounding boxes or not
+    figsize: (optional) the size of the image
+    colors: (optional) An array or colors to use with each object
+    captions: (optional) A list of strings to use as captions for each object
+    """
+    # Number of instances
+    N = boxes.shape[0]
+    if not N:
+        print("\n*** No instances to display *** \n")
+    else:
+        assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
+
+    # If no axis is passed, create one and automatically call show()
+    auto_show = False
+    if not ax:
+        _, ax = plt.subplots(1, figsize=figsize)
+        auto_show = True
+
+    # Generate random colors
+    colors = colors or random_colors(N)
+
+    # Show area outside image boundaries.
+    height, width = image.shape[:2]
+    ax.set_ylim(height + 10, -10)
+    ax.set_xlim(-10, width + 10)
+    ax.axis('off')
+    ax.set_title(title)
+
+    masked_image = image.astype(np.uint32).copy()
+    for i in range(N):
+        color = colors[i]
+
+        # Bounding box
+        if not np.any(boxes[i]):
+            # Skip this instance. Has no bbox. Likely lost in image cropping.
+            continue
+        y1, x1, y2, x2 = boxes[i]
+        if show_bbox:
+            p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
+                                alpha=0.7, linestyle="dashed",
+                                edgecolor=color, facecolor='none')
+            ax.add_patch(p)
+
+        # Label
+        if not captions:
+            class_id = class_ids[i]
+            score = scores[i] if scores is not None else None
+            label = class_names[class_id]
+            caption = "{} {:.3f}".format(label, score) if score else label
+        else:
+            caption = captions[i]
+        ax.text(x1, y1 + 8, caption,
+                color='w', size=11, backgroundcolor="none")
+
+        # Mask
+        mask = masks[:, :, i]
+        if show_mask:
+            masked_image = apply_mask(masked_image, mask, color)
+
+        # Mask Polygon
+        # Pad to ensure proper polygons for masks that touch image edges.
+        padded_mask = np.zeros(
+            (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
+        padded_mask[1:-1, 1:-1] = mask
+        contours = find_contours(padded_mask, 0.5)
+        for verts in contours:
+            # Subtract the padding and flip (y, x) to (x, y)
+            verts = np.fliplr(verts) - 1
+            p = Polygon(verts, facecolor="none", edgecolor=color)
+            ax.add_patch(p)
+    ax.imshow(masked_image.astype(np.uint8))
+    if auto_show:
+        plt.show()
+
+
+def display_instances_(image, boxes, masks, class_ids, class_names,
+                      scores=None, title="",
+                      figsize=(16, 16), ax=None,
+                      show_mask=True, show_bbox=True,
+                      colors=None, captions=None,
+                      making_video=False, making_image=False, detect=False, hc=False, real_time=False):
+    """
+    boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
+    masks: [height, width, num_instances]
+    class_ids: [num_instances]
+    class_names: list of class names of the dataset
+    scores: (optional) confidence scores for each box
+    title: (optional) Figure title
+    show_mask, show_bbox: To show masks and bounding boxes or not
+    figsize: (optional) the size of the image
+    colors: (optional) An array or colors to use with each object
+    captions: (optional) A list of strings to use as captions for each object
+    """
+    # Number of instances
+    N = boxes.shape[0]
+    if not N:
+        print("\n*** No instances to display *** \n")
+    else:
+        assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
+
+    # If no axis is passed, create one and automatically call show()
+    auto_show = True
+    if not ax:
+        fig, ax = plt.subplots(1, figsize=figsize)
+        canvas = FigureCanvas(fig)
+
+    # Generate random colors
+    if not making_video or not real_time:
+        colors = colors or random_colors(N)
+    # Show area outside image boundaries.
+    height, width = image.shape[:2]
+    ax.set_ylim(height + 1, -1)
+    ax.set_xlim(-1, width + 1)
+    ax.axis('off')
+    ax.set_title(title)
+
+    masked_image = image.astype(np.uint32).copy()
+    for i in range(N):
+        class_id = class_ids[i]
+        if making_video or real_time:
+            # you can also assign a specific color for each class. etc:
+            # if class_id == 1:
+            #     color = colors[0]
+            # elif class_id == 2:
+            #     color = colors[1]
+            color = colors[class_id-1]
+        elif hc:
+            #just for hard-code the mask for paper
+            if class_id == 14:
+                color = colors[0]
+            else:
+                color = colors[class_id]
+        else:
+            color = colors[i]
+
+        # Bounding box
+        if not np.any(boxes[i]):
+            # Skip this instance. Has no bbox. Likely lost in image cropping.
+            continue
+        y1, x1, y2, x2 = boxes[i]
+        if show_bbox:
+            p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
+                                alpha=0.7, linestyle="dashed",
+                                edgecolor=color, facecolor='none')
+            ax.add_patch(p)
+
+        # Label
+        if not captions:
+            score = scores[i] if scores is not None else None
+            label = class_names[class_id]
+            x = random.randint(x1, (x1 + x2) // 2)
+            caption = "{} {:.3f}".format(label, score) if score else label
+        else:
+            caption = captions[i]
+        ax.text(x1, y1 + 8, caption,
+                color='w', size=14, backgroundcolor="none")
+
+        # Mask
+        mask = masks[:, :, i]
+        if show_mask:
+            masked_image = apply_mask(masked_image, mask, color)
+
+        # Mask Polygon
+        # Pad to ensure proper polygons for masks that touch image edges.
+        padded_mask = np.zeros(
+            (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
+        padded_mask[1:-1, 1:-1] = mask
+        contours = find_contours(padded_mask, 0.5)
+        for verts in contours:
+            # Subtract the padding and flip (y, x) to (x, y)
+            verts = np.fliplr(verts) - 1
+            p = Polygon(verts, facecolor="none", edgecolor=color)
+            ax.add_patch(p)
+    ax.imshow(masked_image.astype(np.uint8))
+    if detect:
+        plt.close()
+        return canvas
+    # To transform the drawn figure into ndarray X
+    fig.canvas.draw()
+    X = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
+    X = X.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+    # open cv's RGB style: BGR
+    if not real_time:
+        X = X[..., ::-1]
+    if making_video or real_time:
+        plt.close()
+        return X
+    elif making_image:
+        cv2.imwrite('splash.png', X)
+    if auto_show:
+        plt.show()
+
+
+def display_differences(image,
+                        gt_box, gt_class_id, gt_mask,
+                        pred_box, pred_class_id, pred_score, pred_mask,
+                        class_names, title="", ax=None,
+                        show_mask=True, show_box=True,
+                        iou_threshold=0.5, score_threshold=0.5):
+    """Display ground truth and prediction instances on the same image."""
+    # Match predictions to ground truth
+    gt_match, pred_match, overlaps = utils.compute_matches(
+        gt_box, gt_class_id, gt_mask,
+        pred_box, pred_class_id, pred_score, pred_mask,
+        iou_threshold=iou_threshold, score_threshold=score_threshold)
+    # Ground truth = green. Predictions = red
+    colors = [(0, 1, 0, .8)] * len(gt_match)\
+           + [(1, 0, 0, 1)] * len(pred_match)
+    # Concatenate GT and predictions
+    class_ids = np.concatenate([gt_class_id, pred_class_id])
+    scores = np.concatenate([np.zeros([len(gt_match)]), pred_score])
+    boxes = np.concatenate([gt_box, pred_box])
+    masks = np.concatenate([gt_mask, pred_mask], axis=-1)
+    # Captions per instance show score/IoU
+    captions = ["" for m in gt_match] + ["{:.2f} / {:.2f}".format(
+        pred_score[i],
+        (overlaps[i, int(pred_match[i])]
+            if pred_match[i] > -1 else overlaps[i].max()))
+            for i in range(len(pred_match))]
+    # Set title if not provided
+    title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU"
+    # Display
+    display_instances(
+        image,
+        boxes, masks, class_ids,
+        class_names, scores, ax=ax,
+        show_bbox=show_box, show_mask=show_mask,
+        colors=colors, captions=captions,
+        title=title)
+
+
+def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
+    """
+    anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
+    proposals: [n, 4] the same anchors but refined to fit objects better.
+    """
+    masked_image = image.copy()
+
+    # Pick random anchors in case there are too many.
+    ids = np.arange(rois.shape[0], dtype=np.int32)
+    ids = np.random.choice(
+        ids, limit, replace=False) if ids.shape[0] > limit else ids
+
+    fig, ax = plt.subplots(1, figsize=(12, 12))
+    if rois.shape[0] > limit:
+        plt.title("Showing {} random ROIs out of {}".format(
+            len(ids), rois.shape[0]))
+    else:
+        plt.title("{} ROIs".format(len(ids)))
+
+    # Show area outside image boundaries.
+    ax.set_ylim(image.shape[0] + 20, -20)
+    ax.set_xlim(-50, image.shape[1] + 20)
+    ax.axis('off')
+
+    for i, id in enumerate(ids):
+        color = np.random.rand(3)
+        class_id = class_ids[id]
+        # ROI
+        y1, x1, y2, x2 = rois[id]
+        p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
+                              edgecolor=color if class_id else "gray",
+                              facecolor='none', linestyle="dashed")
+        ax.add_patch(p)
+        # Refined ROI
+        if class_id:
+            ry1, rx1, ry2, rx2 = refined_rois[id]
+            p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
+                                  edgecolor=color, facecolor='none')
+            ax.add_patch(p)
+            # Connect the top-left corners of the anchor and proposal for easy visualization
+            ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
+
+            # Label
+            label = class_names[class_id]
+            ax.text(rx1, ry1 + 8, "{}".format(label),
+                    color='w', size=11, backgroundcolor="none")
+
+            # Mask
+            m = utils.unmold_mask(mask[id], rois[id]
+                                  [:4].astype(np.int32), image.shape)
+            masked_image = apply_mask(masked_image, m, color)
+
+    ax.imshow(masked_image)
+
+    # Print stats
+    print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
+    print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
+    print("Positive Ratio: {:.2f}".format(
+        class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
+
+
+# TODO: Replace with matplotlib equivalent?
+def draw_box(image, box, color):
+    """Draw 3-pixel width bounding boxes on the given image array.
+    color: list of 3 int values for RGB.
+    """
+    y1, x1, y2, x2 = box
+    image[y1:y1 + 2, x1:x2] = color
+    image[y2:y2 + 2, x1:x2] = color
+    image[y1:y2, x1:x1 + 2] = color
+    image[y1:y2, x2:x2 + 2] = color
+    return image
+
+
+def display_top_masks(image, mask, class_ids, class_names, limit=4):
+    """Display the given image and the top few class masks."""
+    to_display = []
+    titles = []
+    to_display.append(image)
+    titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
+    # Pick top prominent classes in this image
+    unique_class_ids = np.unique(class_ids)
+    mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
+                 for i in unique_class_ids]
+    top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
+                                    key=lambda r: r[1], reverse=True) if v[1] > 0]
+    # Generate images and titles
+    for i in range(limit):
+        class_id = top_ids[i] if i < len(top_ids) else -1
+        # Pull masks of instances belonging to the same class.
+        m = mask[:, :, np.where(class_ids == class_id)[0]]
+        m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
+        to_display.append(m)
+        titles.append(class_names[class_id] if class_id != -1 else "-")
+    display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
+
+
+def plot_precision_recall(AP, precisions, recalls):
+    """Draw the precision-recall curve.
+    AP: Average precision at IoU >= 0.5
+    precisions: list of precision values
+    recalls: list of recall values
+    """
+    # Plot the Precision-Recall curve
+    _, ax = plt.subplots(1)
+    ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
+    ax.set_ylim(0, 1.1)
+    ax.set_xlim(0, 1.1)
+    _ = ax.plot(recalls, precisions)
+
+
+def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
+                  overlaps, class_names, threshold=0.5):
+    """Draw a grid showing how ground truth objects are classified.
+    gt_class_ids: [N] int. Ground truth class IDs
+    pred_class_id: [N] int. Predicted class IDs
+    pred_scores: [N] float. The probability scores of predicted classes
+    overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes.
+    class_names: list of all class names in the dataset
+    threshold: Float. The prediction probability required to predict a class
+    """
+    gt_class_ids = gt_class_ids[gt_class_ids != 0]
+    pred_class_ids = pred_class_ids[pred_class_ids != 0]
+
+    plt.figure(figsize=(12, 10))
+    plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
+    plt.yticks(np.arange(len(pred_class_ids)),
+               ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
+                for i, id in enumerate(pred_class_ids)])
+    plt.xticks(np.arange(len(gt_class_ids)),
+               [class_names[int(id)] for id in gt_class_ids], rotation=90)
+
+    thresh = overlaps.max() / 2.
+    for i, j in itertools.product(range(overlaps.shape[0]),
+                                  range(overlaps.shape[1])):
+        text = ""
+        if overlaps[i, j] > threshold:
+            text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
+        color = ("white" if overlaps[i, j] > thresh
+                 else "black" if overlaps[i, j] > 0
+                 else "grey")
+        plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
+                 horizontalalignment="center", verticalalignment="center",
+                 fontsize=9, color=color)
+
+    plt.tight_layout()
+    plt.xlabel("Ground Truth")
+    plt.ylabel("Predictions")
+
+
+def draw_boxes(image, boxes=None, refined_boxes=None,
+               masks=None, captions=None, visibilities=None,
+               title="", ax=None):
+    """Draw bounding boxes and segmentation masks with different
+    customizations.
+    boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
+    refined_boxes: Like boxes, but draw with solid lines to show
+        that they're the result of refining 'boxes'.
+    masks: [N, height, width]
+    captions: List of N titles to display on each box
+    visibilities: (optional) List of values of 0, 1, or 2. Determine how
+        prominent each bounding box should be.
+    title: An optional title to show over the image
+    ax: (optional) Matplotlib axis to draw on.
+    """
+    # Number of boxes
+    assert boxes is not None or refined_boxes is not None
+    N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
+
+    # Matplotlib Axis
+    if not ax:
+        _, ax = plt.subplots(1, figsize=(12, 12))
+
+    # Generate random colors
+    colors = random_colors(N)
+
+    # Show area outside image boundaries.
+    margin = image.shape[0] // 10
+    ax.set_ylim(image.shape[0] + margin, -margin)
+    ax.set_xlim(-margin, image.shape[1] + margin)
+    ax.axis('off')
+
+    ax.set_title(title)
+
+    masked_image = image.astype(np.uint32).copy()
+    for i in range(N):
+        # Box visibility
+        visibility = visibilities[i] if visibilities is not None else 1
+        if visibility == 0:
+            color = "gray"
+            style = "dotted"
+            alpha = 0.5
+        elif visibility == 1:
+            color = colors[i]
+            style = "dotted"
+            alpha = 1
+        elif visibility == 2:
+            color = colors[i]
+            style = "solid"
+            alpha = 1
+
+        # Boxes
+        if boxes is not None:
+            if not np.any(boxes[i]):
+                # Skip this instance. Has no bbox. Likely lost in cropping.
+                continue
+            y1, x1, y2, x2 = boxes[i]
+            p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
+                                  alpha=alpha, linestyle=style,
+                                  edgecolor=color, facecolor='none')
+            ax.add_patch(p)
+
+        # Refined boxes
+        if refined_boxes is not None and visibility > 0:
+            ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
+            p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
+                                  edgecolor=color, facecolor='none')
+            ax.add_patch(p)
+            # Connect the top-left corners of the anchor and proposal
+            if boxes is not None:
+                ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
+
+        # Captions
+        if captions is not None:
+            caption = captions[i]
+            # If there are refined boxes, display captions on them
+            if refined_boxes is not None:
+                y1, x1, y2, x2 = ry1, rx1, ry2, rx2
+            ax.text(x1, y1, caption, size=11, verticalalignment='top',
+                    color='w', backgroundcolor="none",
+                    bbox={'facecolor': color, 'alpha': 0.5,
+                          'pad': 2, 'edgecolor': 'none'})
+
+        # Masks
+        if masks is not None:
+            mask = masks[:, :, i]
+            masked_image = apply_mask(masked_image, mask, color)
+            # Mask Polygon
+            # Pad to ensure proper polygons for masks that touch image edges.
+            padded_mask = np.zeros(
+                (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
+            padded_mask[1:-1, 1:-1] = mask
+            contours = find_contours(padded_mask, 0.5)
+            for verts in contours:
+                # Subtract the padding and flip (y, x) to (x, y)
+                verts = np.fliplr(verts) - 1
+                p = Polygon(verts, facecolor="none", edgecolor=color)
+                ax.add_patch(p)
+    ax.imshow(masked_image.astype(np.uint8))
+
+
+def display_table(table):
+    """Display values in a table format.
+    table: an iterable of rows, and each row is an iterable of values.
+    """
+    html = ""
+    for row in table:
+        row_html = ""
+        for col in row:
+            row_html += "<td>{:40}</td>".format(str(col))
+        html += "<tr>" + row_html + "</tr>"
+    html = "<table>" + html + "</table>"
+    IPython.display.display(IPython.display.HTML(html))
+
+
+def display_weight_stats(model):
+    """Scans all the weights in the model and returns a list of tuples
+    that contain stats about each weight.
+    """
+    layers = model.get_trainable_layers()
+    table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
+    for l in layers:
+        weight_values = l.get_weights()  # list of Numpy arrays
+        weight_tensors = l.weights  # list of TF tensors
+        for i, w in enumerate(weight_values):
+            weight_name = weight_tensors[i].name
+            # Detect problematic layers. Exclude biases of conv layers.
+            alert = ""
+            if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
+                alert += "<span style='color:red'>*** dead?</span>"
+            if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
+                alert += "<span style='color:red'>*** Overflow?</span>"
+            # Add row
+            table.append([
+                weight_name + alert,
+                str(w.shape),
+                "{:+9.4f}".format(w.min()),
+                "{:+10.4f}".format(w.max()),
+                "{:+9.4f}".format(w.std()),
+            ])
+    display_table(table)
\ No newline at end of file
diff --git a/src/tracker/mrcnn/__pycache__/config.cpython-38.pyc b/src/tracker/mrcnn/__pycache__/config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..691055b7198466f3d47ca27ac24fdc853a4120ac
Binary files /dev/null and b/src/tracker/mrcnn/__pycache__/config.cpython-38.pyc differ
diff --git a/src/tracker/mrcnn/__pycache__/config.cpython-39.pyc b/src/tracker/mrcnn/__pycache__/config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9b07f9d69b3d3cd60634202fcfb5fe81961d43a3
Binary files /dev/null and b/src/tracker/mrcnn/__pycache__/config.cpython-39.pyc differ
diff --git a/src/tracker/mrcnn/__pycache__/model.cpython-38.pyc b/src/tracker/mrcnn/__pycache__/model.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..da4c3bfdcabddff2da43787dd6581edde14f8799
Binary files /dev/null and b/src/tracker/mrcnn/__pycache__/model.cpython-38.pyc differ
diff --git a/src/tracker/mrcnn/__pycache__/model.cpython-39.pyc b/src/tracker/mrcnn/__pycache__/model.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4ce9e4395a66c1616366778cb8e74327b8710532
Binary files /dev/null and b/src/tracker/mrcnn/__pycache__/model.cpython-39.pyc differ
diff --git a/src/tracker/mrcnn/__pycache__/mrcnn_color.cpython-38.pyc b/src/tracker/mrcnn/__pycache__/mrcnn_color.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55c9d0fb888de60766c11551a09e89a4acf57c3b
Binary files /dev/null and b/src/tracker/mrcnn/__pycache__/mrcnn_color.cpython-38.pyc differ
diff --git a/src/tracker/mrcnn/__pycache__/mrcnn_color.cpython-39.pyc b/src/tracker/mrcnn/__pycache__/mrcnn_color.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1db5cb74af7120c2f8c04d5f5e8a8a210b710ac9
Binary files /dev/null and b/src/tracker/mrcnn/__pycache__/mrcnn_color.cpython-39.pyc differ
diff --git a/src/tracker/mrcnn/__pycache__/utils.cpython-38.pyc b/src/tracker/mrcnn/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..baf98053bfcbaa3846e3e3d5fbd056484a21f2fc
Binary files /dev/null and b/src/tracker/mrcnn/__pycache__/utils.cpython-38.pyc differ
diff --git a/src/tracker/mrcnn/__pycache__/utils.cpython-39.pyc b/src/tracker/mrcnn/__pycache__/utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..de19de0e8e619a4b99af33351bda3bb82c5f3607
Binary files /dev/null and b/src/tracker/mrcnn/__pycache__/utils.cpython-39.pyc differ
diff --git a/src/tracker/mrcnn/config.py b/src/tracker/mrcnn/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..b652b56ff69c7ebea135d6ed2b24464ecc0cbdc4
--- /dev/null
+++ b/src/tracker/mrcnn/config.py
@@ -0,0 +1,235 @@
+"""
+Mask R-CNN
+Base Configurations class.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+
+import numpy as np
+
+
+# Base Configuration Class
+# Don't use this class directly. Instead, sub-class it and override
+# the configurations you need to change.
+
+class Config(object):
+    """Base configuration class. For custom configurations, create a
+    sub-class that inherits from this one and override properties
+    that need to be changed.
+    """
+    # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
+    # Useful if your code needs to do things differently depending on which
+    # experiment is running.
+    NAME = None  # Override in sub-classes
+
+    # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1.
+    GPU_COUNT = 1
+
+    # Number of images to train with on each GPU. A 12GB GPU can typically
+    # handle 2 images of 1024x1024px.
+    # Adjust based on your GPU memory and image sizes. Use the highest
+    # number that your GPU can handle for best performance.
+    IMAGES_PER_GPU = 2
+
+    # Number of training steps per epoch
+    # This doesn't need to match the size of the training set. Tensorboard
+    # updates are saved at the end of each epoch, so setting this to a
+    # smaller number means getting more frequent TensorBoard updates.
+    # Validation stats are also calculated at each epoch end and they
+    # might take a while, so don't set this too small to avoid spending
+    # a lot of time on validation stats.
+    STEPS_PER_EPOCH = 1000
+
+    # Number of validation steps to run at the end of every training epoch.
+    # A bigger number improves accuracy of validation stats, but slows
+    # down the training.
+    VALIDATION_STEPS = 50
+
+    # Backbone network architecture
+    # Supported values are: resnet50, resnet101.
+    # You can also provide a callable that should have the signature
+    # of model.resnet_graph. If you do so, you need to supply a callable
+    # to COMPUTE_BACKBONE_SHAPE as well
+    BACKBONE = "resnet101"
+
+    # Only useful if you supply a callable to BACKBONE. Should compute
+    # the shape of each layer of the FPN Pyramid.
+    # See model.compute_backbone_shapes
+    COMPUTE_BACKBONE_SHAPE = None
+
+    # The strides of each layer of the FPN Pyramid. These values
+    # are based on a Resnet101 backbone.
+    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
+
+    # Size of the fully-connected layers in the classification graph
+    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
+
+    # Size of the top-down layers used to build the feature pyramid
+    TOP_DOWN_PYRAMID_SIZE = 256
+
+    # Number of classification classes (including background)
+    NUM_CLASSES = 1  # Override in sub-classes
+
+    # Length of square anchor side in pixels
+    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
+
+    # Ratios of anchors at each cell (width/height)
+    # A value of 1 represents a square anchor, and 0.5 is a wide anchor
+    RPN_ANCHOR_RATIOS = [0.5, 1, 2]
+
+    # Anchor stride
+    # If 1 then anchors are created for each cell in the backbone feature map.
+    # If 2, then anchors are created for every other cell, and so on.
+    RPN_ANCHOR_STRIDE = 1
+
+    # Non-max suppression threshold to filter RPN proposals.
+    # You can increase this during training to generate more propsals.
+    RPN_NMS_THRESHOLD = 0.7
+
+    # How many anchors per image to use for RPN training
+    RPN_TRAIN_ANCHORS_PER_IMAGE = 256
+    
+    # ROIs kept after tf.nn.top_k and before non-maximum suppression
+    PRE_NMS_LIMIT = 6000
+
+    # ROIs kept after non-maximum suppression (training and inference)
+    POST_NMS_ROIS_TRAINING = 2000
+    POST_NMS_ROIS_INFERENCE = 1000
+
+    # If enabled, resizes instance masks to a smaller size to reduce
+    # memory load. Recommended when using high-resolution images.
+    USE_MINI_MASK = True
+    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
+
+    # Input image resizing
+    # Generally, use the "square" resizing mode for training and predicting
+    # and it should work well in most cases. In this mode, images are scaled
+    # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
+    # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
+    # padded with zeros to make it a square so multiple images can be put
+    # in one batch.
+    # Available resizing modes:
+    # none:   No resizing or padding. Return the image unchanged.
+    # square: Resize and pad with zeros to get a square image
+    #         of size [max_dim, max_dim].
+    # pad64:  Pads width and height with zeros to make them multiples of 64.
+    #         If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
+    #         up before padding. IMAGE_MAX_DIM is ignored in this mode.
+    #         The multiple of 64 is needed to ensure smooth scaling of feature
+    #         maps up and down the 6 levels of the FPN pyramid (2**6=64).
+    # crop:   Picks random crops from the image. First, scales the image based
+    #         on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
+    #         size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
+    #         IMAGE_MAX_DIM is not used in this mode.
+    IMAGE_RESIZE_MODE = "square"
+    IMAGE_MIN_DIM = 800
+    IMAGE_MAX_DIM = 1024
+    # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
+    # up scaling. For example, if set to 2 then images are scaled up to double
+    # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
+    # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
+    IMAGE_MIN_SCALE = 0
+    # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
+    # Changing this requires other changes in the code. See the WIKI for more
+    # details: https://github.com/matterport/Mask_RCNN/wiki
+    IMAGE_CHANNEL_COUNT = 3
+
+    # Image mean (RGB)
+    MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
+
+    # Number of ROIs per image to feed to classifier/mask heads
+    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
+    # enough positive proposals to fill this and keep a positive:negative
+    # ratio of 1:3. You can increase the number of proposals by adjusting
+    # the RPN NMS threshold.
+    TRAIN_ROIS_PER_IMAGE = 200
+
+    # Percent of positive ROIs used to train classifier/mask heads
+    ROI_POSITIVE_RATIO = 0.33
+
+    # Pooled ROIs
+    POOL_SIZE = 7
+    MASK_POOL_SIZE = 14
+
+    # Shape of output mask
+    # To change this you also need to change the neural network mask branch
+    MASK_SHAPE = [28, 28]
+
+    # Maximum number of ground truth instances to use in one image
+    MAX_GT_INSTANCES = 100
+
+    # Bounding box refinement standard deviation for RPN and final detections.
+    RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
+    BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
+
+    # Max number of final detections
+    DETECTION_MAX_INSTANCES = 100
+
+    # Minimum probability value to accept a detected instance
+    # ROIs below this threshold are skipped
+    DETECTION_MIN_CONFIDENCE = 0.7
+
+    # Non-maximum suppression threshold for detection
+    DETECTION_NMS_THRESHOLD = 0.3
+
+    # Learning rate and momentum
+    # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
+    # weights to explode. Likely due to differences in optimizer
+    # implementation.
+    LEARNING_RATE = 0.001
+    LEARNING_MOMENTUM = 0.9
+
+    # Weight decay regularization
+    WEIGHT_DECAY = 0.0001
+
+    # Loss weights for more precise optimization.
+    # Can be used for R-CNN training setup.
+    LOSS_WEIGHTS = {
+        "rpn_class_loss": 1.,
+        "rpn_bbox_loss": 1.,
+        "mrcnn_class_loss": 1.,
+        "mrcnn_bbox_loss": 1.,
+        "mrcnn_mask_loss": 1.
+    }
+
+    # Use RPN ROIs or externally generated ROIs for training
+    # Keep this True for most situations. Set to False if you want to train
+    # the head branches on ROI generated by code rather than the ROIs from
+    # the RPN. For example, to debug the classifier head without having to
+    # train the RPN.
+    USE_RPN_ROIS = True
+
+    # Train or freeze batch normalization layers
+    #     None: Train BN layers. This is the normal mode
+    #     False: Freeze BN layers. Good when using a small batch size
+    #     True: (don't use). Set layer in training mode even when predicting
+    TRAIN_BN = False  # Defaulting to False since batch size is often small
+
+    # Gradient norm clipping
+    GRADIENT_CLIP_NORM = 5.0
+
+    def __init__(self):
+        """Set values of computed attributes."""
+        # Effective batch size
+        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
+
+        # Input image size
+        if self.IMAGE_RESIZE_MODE == "crop":
+            self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,
+                self.IMAGE_CHANNEL_COUNT])
+        else:
+            self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
+                self.IMAGE_CHANNEL_COUNT])
+
+        # Image meta data length
+        # See compose_image_meta() for details
+        self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
+
+    def display(self):
+        """Display Configuration values."""
+        print("\nConfigurations:")
+        for a in dir(self):
+            if not a.startswith("__") and not callable(getattr(self, a)):
+                print("{:30} {}".format(a, getattr(self, a)))
+        print("\n")
\ No newline at end of file
diff --git a/src/tracker/mrcnn/model.py b/src/tracker/mrcnn/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bcd9e08b41a3ac7dc859e5cfe6f3061467558c0
--- /dev/null
+++ b/src/tracker/mrcnn/model.py
@@ -0,0 +1,2804 @@
+"""
+Mask R-CNN
+The main Mask R-CNN model implementation.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+
+import os
+import random
+import datetime
+import re
+import math
+import logging
+from collections import OrderedDict
+import multiprocessing
+import numpy as np
+import tensorflow as tf
+import keras
+import keras.backend as K
+import keras.layers as KL
+import keras.engine as KE
+import keras.models as KM
+
+from src.tracker.mrcnn import utils
+
+# Requires TensorFlow 1.3+ and Keras 2.0.8+.
+from distutils.version import LooseVersion
+assert LooseVersion(tf.__version__) >= LooseVersion("1.3")
+assert LooseVersion(keras.__version__) >= LooseVersion('2.0.8')
+
+
+############################################################
+#  Utility Functions
+############################################################
+
+def log(text, array=None):
+    """Prints a text message. And, optionally, if a Numpy array is provided it
+    prints it's shape, min, and max values.
+    """
+    if array is not None:
+        text = text.ljust(25)
+        text += ("shape: {:20}  ".format(str(array.shape)))
+        if array.size:
+            text += ("min: {:10.5f}  max: {:10.5f}".format(array.min(),array.max()))
+        else:
+            text += ("min: {:10}  max: {:10}".format("",""))
+        text += "  {}".format(array.dtype)
+    print(text)
+
+
+class BatchNorm(KL.BatchNormalization):
+    """Extends the Keras BatchNormalization class to allow a central place
+    to make changes if needed.
+    Batch normalization has a negative effect on training if batches are small
+    so this layer is often frozen (via setting in Config class) and functions
+    as linear layer.
+    """
+    def call(self, inputs, training=None):
+        """
+        Note about training values:
+            None: Train BN layers. This is the normal mode
+            False: Freeze BN layers. Good when batch size is small
+            True: (don't use). Set layer in training mode even when making inferences
+        """
+        return super(self.__class__, self).call(inputs, training=training)
+
+
+def compute_backbone_shapes(config, image_shape):
+    """Computes the width and height of each stage of the backbone network.
+    Returns:
+        [N, (height, width)]. Where N is the number of stages
+    """
+    if callable(config.BACKBONE):
+        return config.COMPUTE_BACKBONE_SHAPE(image_shape)
+
+    # Currently supports ResNet only
+    assert config.BACKBONE in ["resnet50", "resnet101"]
+    return np.array(
+        [[int(math.ceil(image_shape[0] / stride)),
+            int(math.ceil(image_shape[1] / stride))]
+            for stride in config.BACKBONE_STRIDES])
+
+
+############################################################
+#  Resnet Graph
+############################################################
+
+# Code adopted from:
+# https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py
+
+def identity_block(input_tensor, kernel_size, filters, stage, block,
+                   use_bias=True, train_bn=True):
+    """The identity_block is the block that has no conv layer at shortcut
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: default 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the nb_filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+        use_bias: Boolean. To use or not use a bias in conv layers.
+        train_bn: Boolean. Train or freeze Batch Norm layers
+    """
+    nb_filter1, nb_filter2, nb_filter3 = filters
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+    x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a',
+                  use_bias=use_bias)(input_tensor)
+    x = BatchNorm(name=bn_name_base + '2a')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same',
+                  name=conv_name_base + '2b', use_bias=use_bias)(x)
+    x = BatchNorm(name=bn_name_base + '2b')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c',
+                  use_bias=use_bias)(x)
+    x = BatchNorm(name=bn_name_base + '2c')(x, training=train_bn)
+
+    x = KL.Add()([x, input_tensor])
+    x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x)
+    return x
+
+
+def conv_block(input_tensor, kernel_size, filters, stage, block,
+               strides=(2, 2), use_bias=True, train_bn=True):
+    """conv_block is the block that has a conv layer at shortcut
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: default 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the nb_filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+        use_bias: Boolean. To use or not use a bias in conv layers.
+        train_bn: Boolean. Train or freeze Batch Norm layers
+    Note that from stage 3, the first conv layer at main path is with subsample=(2,2)
+    And the shortcut should have subsample=(2,2) as well
+    """
+    nb_filter1, nb_filter2, nb_filter3 = filters
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+    x = KL.Conv2D(nb_filter1, (1, 1), strides=strides,
+                  name=conv_name_base + '2a', use_bias=use_bias)(input_tensor)
+    x = BatchNorm(name=bn_name_base + '2a')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same',
+                  name=conv_name_base + '2b', use_bias=use_bias)(x)
+    x = BatchNorm(name=bn_name_base + '2b')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base +
+                  '2c', use_bias=use_bias)(x)
+    x = BatchNorm(name=bn_name_base + '2c')(x, training=train_bn)
+
+    shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides,
+                         name=conv_name_base + '1', use_bias=use_bias)(input_tensor)
+    shortcut = BatchNorm(name=bn_name_base + '1')(shortcut, training=train_bn)
+
+    x = KL.Add()([x, shortcut])
+    x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x)
+    return x
+
+
+def resnet_graph(input_image, architecture, stage5=False, train_bn=True):
+    """Build a ResNet graph.
+        architecture: Can be resnet50 or resnet101
+        stage5: Boolean. If False, stage5 of the network is not created
+        train_bn: Boolean. Train or freeze Batch Norm layers
+    """
+    assert architecture in ["resnet50", "resnet101"]
+    # Stage 1
+    x = KL.ZeroPadding2D((3, 3))(input_image)
+    x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x)
+    x = BatchNorm(name='bn_conv1')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+    C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)
+    # Stage 2
+    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), train_bn=train_bn)
+    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', train_bn=train_bn)
+    C2 = x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', train_bn=train_bn)
+    # Stage 3
+    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', train_bn=train_bn)
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', train_bn=train_bn)
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', train_bn=train_bn)
+    C3 = x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', train_bn=train_bn)
+    # Stage 4
+    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', train_bn=train_bn)
+    block_count = {"resnet50": 5, "resnet101": 22}[architecture]
+    for i in range(block_count):
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i), train_bn=train_bn)
+    C4 = x
+    # Stage 5
+    if stage5:
+        x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', train_bn=train_bn)
+        x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', train_bn=train_bn)
+        C5 = x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', train_bn=train_bn)
+    else:
+        C5 = None
+    return [C1, C2, C3, C4, C5]
+
+
+############################################################
+#  Proposal Layer
+############################################################
+
+def apply_box_deltas_graph(boxes, deltas):
+    """Applies the given deltas to the given boxes.
+    boxes: [N, (y1, x1, y2, x2)] boxes to update
+    deltas: [N, (dy, dx, log(dh), log(dw))] refinements to apply
+    """
+    # Convert to y, x, h, w
+    height = boxes[:, 2] - boxes[:, 0]
+    width = boxes[:, 3] - boxes[:, 1]
+    center_y = boxes[:, 0] + 0.5 * height
+    center_x = boxes[:, 1] + 0.5 * width
+    # Apply deltas
+    center_y += deltas[:, 0] * height
+    center_x += deltas[:, 1] * width
+    height *= tf.exp(deltas[:, 2])
+    width *= tf.exp(deltas[:, 3])
+    # Convert back to y1, x1, y2, x2
+    y1 = center_y - 0.5 * height
+    x1 = center_x - 0.5 * width
+    y2 = y1 + height
+    x2 = x1 + width
+    result = tf.stack([y1, x1, y2, x2], axis=1, name="apply_box_deltas_out")
+    return result
+
+
+def clip_boxes_graph(boxes, window):
+    """
+    boxes: [N, (y1, x1, y2, x2)]
+    window: [4] in the form y1, x1, y2, x2
+    """
+    # Split
+    wy1, wx1, wy2, wx2 = tf.split(window, 4)
+    y1, x1, y2, x2 = tf.split(boxes, 4, axis=1)
+    # Clip
+    y1 = tf.maximum(tf.minimum(y1, wy2), wy1)
+    x1 = tf.maximum(tf.minimum(x1, wx2), wx1)
+    y2 = tf.maximum(tf.minimum(y2, wy2), wy1)
+    x2 = tf.maximum(tf.minimum(x2, wx2), wx1)
+    clipped = tf.concat([y1, x1, y2, x2], axis=1, name="clipped_boxes")
+    clipped.set_shape((clipped.shape[0], 4))
+    return clipped
+
+
+class ProposalLayer(KE.Layer):
+    """Receives anchor scores and selects a subset to pass as proposals
+    to the second stage. Filtering is done based on anchor scores and
+    non-max suppression to remove overlaps. It also applies bounding
+    box refinement deltas to anchors.
+    Inputs:
+        rpn_probs: [batch, num_anchors, (bg prob, fg prob)]
+        rpn_bbox: [batch, num_anchors, (dy, dx, log(dh), log(dw))]
+        anchors: [batch, num_anchors, (y1, x1, y2, x2)] anchors in normalized coordinates
+    Returns:
+        Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
+    """
+
+    def __init__(self, proposal_count, nms_threshold, config=None, **kwargs):
+        super(ProposalLayer, self).__init__(**kwargs)
+        self.config = config
+        self.proposal_count = proposal_count
+        self.nms_threshold = nms_threshold
+
+    def call(self, inputs):
+        # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
+        scores = inputs[0][:, :, 1]
+        # Box deltas [batch, num_rois, 4]
+        deltas = inputs[1]
+        deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4])
+        # Anchors
+        anchors = inputs[2]
+
+        # Improve performance by trimming to top anchors by score
+        # and doing the rest on the smaller subset.
+        pre_nms_limit = tf.minimum(self.config.PRE_NMS_LIMIT, tf.shape(anchors)[1])
+        ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True,
+                         name="top_anchors").indices
+        scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y),
+                                   self.config.IMAGES_PER_GPU)
+        deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y),
+                                   self.config.IMAGES_PER_GPU)
+        pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x),
+                                    self.config.IMAGES_PER_GPU,
+                                    names=["pre_nms_anchors"])
+
+        # Apply deltas to anchors to get refined anchors.
+        # [batch, N, (y1, x1, y2, x2)]
+        boxes = utils.batch_slice([pre_nms_anchors, deltas],
+                                  lambda x, y: apply_box_deltas_graph(x, y),
+                                  self.config.IMAGES_PER_GPU,
+                                  names=["refined_anchors"])
+
+        # Clip to image boundaries. Since we're in normalized coordinates,
+        # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)]
+        window = np.array([0, 0, 1, 1], dtype=np.float32)
+        boxes = utils.batch_slice(boxes,
+                                  lambda x: clip_boxes_graph(x, window),
+                                  self.config.IMAGES_PER_GPU,
+                                  names=["refined_anchors_clipped"])
+
+        # Filter out small boxes
+        # According to Xinlei Chen's paper, this reduces detection accuracy
+        # for small objects, so we're skipping it.
+
+        # Non-max suppression
+        def nms(boxes, scores):
+            indices = tf.image.non_max_suppression(
+                boxes, scores, self.proposal_count,
+                self.nms_threshold, name="rpn_non_max_suppression")
+            proposals = tf.gather(boxes, indices)
+            # Pad if needed
+            padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0)
+            proposals = tf.pad(proposals, [(0, padding), (0, 0)])
+            return proposals
+        proposals = utils.batch_slice([boxes, scores], nms,
+                                      self.config.IMAGES_PER_GPU)
+        return proposals
+
+    def compute_output_shape(self, input_shape):
+        return (None, self.proposal_count, 4)
+
+
+############################################################
+#  ROIAlign Layer
+############################################################
+
+def log2_graph(x):
+    """Implementation of Log2. TF doesn't have a native implementation."""
+    return tf.math.log(x) / tf.math.log(2.0)
+
+
+class PyramidROIAlign(KE.Layer):
+    """Implements ROI Pooling on multiple levels of the feature pyramid.
+    Params:
+    - pool_shape: [pool_height, pool_width] of the output pooled regions. Usually [7, 7]
+    Inputs:
+    - boxes: [batch, num_boxes, (y1, x1, y2, x2)] in normalized
+             coordinates. Possibly padded with zeros if not enough
+             boxes to fill the array.
+    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
+    - feature_maps: List of feature maps from different levels of the pyramid.
+                    Each is [batch, height, width, channels]
+    Output:
+    Pooled regions in the shape: [batch, num_boxes, pool_height, pool_width, channels].
+    The width and height are those specific in the pool_shape in the layer
+    constructor.
+    """
+
+    def __init__(self, pool_shape, **kwargs):
+        super(PyramidROIAlign, self).__init__(**kwargs)
+        self.pool_shape = tuple(pool_shape)
+
+    def call(self, inputs):
+        # Crop boxes [batch, num_boxes, (y1, x1, y2, x2)] in normalized coords
+        boxes = inputs[0]
+
+        # Image meta
+        # Holds details about the image. See compose_image_meta()
+        image_meta = inputs[1]
+
+        # Feature Maps. List of feature maps from different level of the
+        # feature pyramid. Each is [batch, height, width, channels]
+        feature_maps = inputs[2:]
+
+        # Assign each ROI to a level in the pyramid based on the ROI area.
+        y1, x1, y2, x2 = tf.split(boxes, 4, axis=2)
+        h = y2 - y1
+        w = x2 - x1
+        # Use shape of first image. Images in a batch must have the same size.
+        image_shape = parse_image_meta_graph(image_meta)['image_shape'][0]
+        # Equation 1 in the Feature Pyramid Networks paper. Account for
+        # the fact that our coordinates are normalized here.
+        # e.g. a 224x224 ROI (in pixels) maps to P4
+        image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32)
+        roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area)))
+        roi_level = tf.minimum(5, tf.maximum(
+            2, 4 + tf.cast(tf.round(roi_level), tf.int32)))
+        roi_level = tf.squeeze(roi_level, 2)
+
+        # Loop through levels and apply ROI pooling to each. P2 to P5.
+        pooled = []
+        box_to_level = []
+        for i, level in enumerate(range(2, 6)):
+            ix = tf.where(tf.equal(roi_level, level))
+            level_boxes = tf.gather_nd(boxes, ix)
+
+            # Box indices for crop_and_resize.
+            box_indices = tf.cast(ix[:, 0], tf.int32)
+
+            # Keep track of which box is mapped to which level
+            box_to_level.append(ix)
+
+            # Stop gradient propogation to ROI proposals
+            level_boxes = tf.stop_gradient(level_boxes)
+            box_indices = tf.stop_gradient(box_indices)
+
+            # Crop and Resize
+            # From Mask R-CNN paper: "We sample four regular locations, so
+            # that we can evaluate either max or average pooling. In fact,
+            # interpolating only a single value at each bin center (without
+            # pooling) is nearly as effective."
+            #
+            # Here we use the simplified approach of a single value per bin,
+            # which is how it's done in tf.crop_and_resize()
+            # Result: [batch * num_boxes, pool_height, pool_width, channels]
+            pooled.append(tf.image.crop_and_resize(
+                feature_maps[i], level_boxes, box_indices, self.pool_shape,
+                method="bilinear"))
+
+        # Pack pooled features into one tensor
+        pooled = tf.concat(pooled, axis=0)
+
+        # Pack box_to_level mapping into one array and add another
+        # column representing the order of pooled boxes
+        box_to_level = tf.concat(box_to_level, axis=0)
+        box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1)
+        box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range],
+                                 axis=1)
+
+        # Rearrange pooled features to match the order of the original boxes
+        # Sort box_to_level by batch then box index
+        # TF doesn't have a way to sort by two columns, so merge them and sort.
+        sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1]
+        ix = tf.nn.top_k(sorting_tensor, k=tf.shape(
+            box_to_level)[0]).indices[::-1]
+        ix = tf.gather(box_to_level[:, 2], ix)
+        pooled = tf.gather(pooled, ix)
+
+        # Re-add the batch dimension
+        shape = tf.concat([tf.shape(boxes)[:2], tf.shape(pooled)[1:]], axis=0)
+        pooled = tf.reshape(pooled, shape)
+        return pooled
+
+    def compute_output_shape(self, input_shape):
+        return input_shape[0][:2] + self.pool_shape + (input_shape[2][-1], )
+
+
+############################################################
+#  Detection Target Layer
+############################################################
+
+def overlaps_graph(boxes1, boxes2):
+    """Computes IoU overlaps between two sets of boxes.
+    boxes1, boxes2: [N, (y1, x1, y2, x2)].
+    """
+    # 1. Tile boxes2 and repeat boxes1. This allows us to compare
+    # every boxes1 against every boxes2 without loops.
+    # TF doesn't have an equivalent to np.repeat() so simulate it
+    # using tf.tile() and tf.reshape.
+    b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1),
+                            [1, 1, tf.shape(boxes2)[0]]), [-1, 4])
+    b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1])
+    # 2. Compute intersections
+    b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1)
+    b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1)
+    y1 = tf.maximum(b1_y1, b2_y1)
+    x1 = tf.maximum(b1_x1, b2_x1)
+    y2 = tf.minimum(b1_y2, b2_y2)
+    x2 = tf.minimum(b1_x2, b2_x2)
+    intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0)
+    # 3. Compute unions
+    b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1)
+    b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1)
+    union = b1_area + b2_area - intersection
+    # 4. Compute IoU and reshape to [boxes1, boxes2]
+    iou = intersection / union
+    overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]])
+    return overlaps
+
+
+def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config):
+    """Generates detection targets for one image. Subsamples proposals and
+    generates target class IDs, bounding box deltas, and masks for each.
+    Inputs:
+    proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates. Might
+               be zero padded if there are not enough proposals.
+    gt_class_ids: [MAX_GT_INSTANCES] int class IDs
+    gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.
+    gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type.
+    Returns: Target ROIs and corresponding class IDs, bounding box shifts,
+    and masks.
+    rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
+    class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
+    deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))]
+    masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox
+           boundaries and resized to neural network output size.
+    Note: Returned arrays might be zero padded if not enough target ROIs.
+    """
+    # Assertions
+    asserts = [
+        tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals],
+                  name="roi_assertion"),
+    ]
+    with tf.control_dependencies(asserts):
+        proposals = tf.identity(proposals)
+
+    # Remove zero padding
+    proposals, _ = trim_zeros_graph(proposals, name="trim_proposals")
+    gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
+    gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros,
+                                   name="trim_gt_class_ids")
+    gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2,
+                         name="trim_gt_masks")
+
+    # Handle COCO crowds
+    # A crowd box in COCO is a bounding box around several instances. Exclude
+    # them from training. A crowd box is given a negative class ID.
+    crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
+    non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
+    crowd_boxes = tf.gather(gt_boxes, crowd_ix)
+    gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
+    gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
+    gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)
+
+    # Compute overlaps matrix [proposals, gt_boxes]
+    overlaps = overlaps_graph(proposals, gt_boxes)
+
+    # Compute overlaps with crowd boxes [proposals, crowd_boxes]
+    crowd_overlaps = overlaps_graph(proposals, crowd_boxes)
+    crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
+    no_crowd_bool = (crowd_iou_max < 0.001)
+
+    # Determine positive and negative ROIs
+    roi_iou_max = tf.reduce_max(overlaps, axis=1)
+    # 1. Positive ROIs are those with >= 0.5 IoU with a GT box
+    positive_roi_bool = (roi_iou_max >= 0.5)
+    positive_indices = tf.where(positive_roi_bool)[:, 0]
+    # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds.
+    negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]
+
+    # Subsample ROIs. Aim for 33% positive
+    # Positive ROIs
+    positive_count = int(config.TRAIN_ROIS_PER_IMAGE *
+                         config.ROI_POSITIVE_RATIO)
+    positive_indices = tf.random_shuffle(positive_indices)[:positive_count]
+    positive_count = tf.shape(positive_indices)[0]
+    # Negative ROIs. Add enough to maintain positive:negative ratio.
+    r = 1.0 / config.ROI_POSITIVE_RATIO
+    negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
+    negative_indices = tf.random_shuffle(negative_indices)[:negative_count]
+    # Gather selected ROIs
+    positive_rois = tf.gather(proposals, positive_indices)
+    negative_rois = tf.gather(proposals, negative_indices)
+
+    # Assign positive ROIs to GT boxes.
+    positive_overlaps = tf.gather(overlaps, positive_indices)
+    roi_gt_box_assignment = tf.cond(
+        tf.greater(tf.shape(positive_overlaps)[1], 0),
+        true_fn = lambda: tf.argmax(positive_overlaps, axis=1),
+        false_fn = lambda: tf.cast(tf.constant([]),tf.int64)
+    )
+    roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
+    roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
+
+    # Compute bbox refinement for positive ROIs
+    deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes)
+    deltas /= config.BBOX_STD_DEV
+
+    # Assign positive ROIs to GT masks
+    # Permute masks to [N, height, width, 1]
+    transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
+    # Pick the right mask for each ROI
+    roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)
+
+    # Compute mask targets
+    boxes = positive_rois
+    if config.USE_MINI_MASK:
+        # Transform ROI coordinates from normalized image space
+        # to normalized mini-mask space.
+        y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
+        gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
+        gt_h = gt_y2 - gt_y1
+        gt_w = gt_x2 - gt_x1
+        y1 = (y1 - gt_y1) / gt_h
+        x1 = (x1 - gt_x1) / gt_w
+        y2 = (y2 - gt_y1) / gt_h
+        x2 = (x2 - gt_x1) / gt_w
+        boxes = tf.concat([y1, x1, y2, x2], 1)
+    box_ids = tf.range(0, tf.shape(roi_masks)[0])
+    masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes,
+                                     box_ids,
+                                     config.MASK_SHAPE)
+    # Remove the extra dimension from masks.
+    masks = tf.squeeze(masks, axis=3)
+
+    # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
+    # binary cross entropy loss.
+    masks = tf.round(masks)
+
+    # Append negative ROIs and pad bbox deltas and masks that
+    # are not used for negative ROIs with zeros.
+    rois = tf.concat([positive_rois, negative_rois], axis=0)
+    N = tf.shape(negative_rois)[0]
+    P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
+    rois = tf.pad(rois, [(0, P), (0, 0)])
+    roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
+    roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
+    deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
+    masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])
+
+    return rois, roi_gt_class_ids, deltas, masks
+
+
+class DetectionTargetLayer(KE.Layer):
+    """Subsamples proposals and generates target box refinement, class_ids,
+    and masks for each.
+    Inputs:
+    proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might
+               be zero padded if there are not enough proposals.
+    gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs.
+    gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized
+              coordinates.
+    gt_masks: [batch, height, width, MAX_GT_INSTANCES] of boolean type
+    Returns: Target ROIs and corresponding class IDs, bounding box shifts,
+    and masks.
+    rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized
+          coordinates
+    target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
+    target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]
+    target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]
+                 Masks cropped to bbox boundaries and resized to neural
+                 network output size.
+    Note: Returned arrays might be zero padded if not enough target ROIs.
+    """
+
+    def __init__(self, config, **kwargs):
+        super(DetectionTargetLayer, self).__init__(**kwargs)
+        self.config = config
+
+    def call(self, inputs):
+        proposals = inputs[0]
+        gt_class_ids = inputs[1]
+        gt_boxes = inputs[2]
+        gt_masks = inputs[3]
+
+        # Slice the batch and run a graph for each slice
+        # TODO: Rename target_bbox to target_deltas for clarity
+        names = ["rois", "target_class_ids", "target_bbox", "target_mask"]
+        outputs = utils.batch_slice(
+            [proposals, gt_class_ids, gt_boxes, gt_masks],
+            lambda w, x, y, z: detection_targets_graph(
+                w, x, y, z, self.config),
+            self.config.IMAGES_PER_GPU, names=names)
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        return [
+            (None, self.config.TRAIN_ROIS_PER_IMAGE, 4),  # rois
+            (None, self.config.TRAIN_ROIS_PER_IMAGE),  # class_ids
+            (None, self.config.TRAIN_ROIS_PER_IMAGE, 4),  # deltas
+            (None, self.config.TRAIN_ROIS_PER_IMAGE, self.config.MASK_SHAPE[0],
+             self.config.MASK_SHAPE[1])  # masks
+        ]
+
+    def compute_mask(self, inputs, mask=None):
+        return [None, None, None, None]
+
+
+############################################################
+#  Detection Layer
+############################################################
+
+def refine_detections_graph(rois, probs, deltas, window, config):
+    """Refine classified proposals and filter overlaps and return final
+    detections.
+    Inputs:
+        rois: [N, (y1, x1, y2, x2)] in normalized coordinates
+        probs: [N, num_classes]. Class probabilities.
+        deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific
+                bounding box deltas.
+        window: (y1, x1, y2, x2) in normalized coordinates. The part of the image
+            that contains the image excluding the padding.
+    Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where
+        coordinates are normalized.
+    """
+    # Class IDs per ROI
+    class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
+    # Class probability of the top class of each ROI
+    indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1)
+    class_scores = tf.gather_nd(probs, indices)
+    # Class-specific bounding box deltas
+    deltas_specific = tf.gather_nd(deltas, indices)
+    # Apply bounding box deltas
+    # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
+    refined_rois = apply_box_deltas_graph(
+        rois, deltas_specific * config.BBOX_STD_DEV)
+    # Clip boxes to image window
+    refined_rois = clip_boxes_graph(refined_rois, window)
+
+    # TODO: Filter out boxes with zero area
+
+    # Filter out background boxes
+    keep = tf.where(class_ids > 0)[:, 0]
+    # Filter out low confidence boxes
+    if config.DETECTION_MIN_CONFIDENCE:
+        conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0]
+        keep = tf.sets.intersection(tf.expand_dims(keep, 0),
+                                        tf.expand_dims(conf_keep, 0))
+        keep = tf.sparse.to_dense(keep)[0]
+
+    # Apply per-class NMS
+    # 1. Prepare variables
+    pre_nms_class_ids = tf.gather(class_ids, keep)
+    pre_nms_scores = tf.gather(class_scores, keep)
+    pre_nms_rois = tf.gather(refined_rois,   keep)
+    unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]
+
+    def nms_keep_map(class_id):
+        """Apply Non-Maximum Suppression on ROIs of the given class."""
+        # Indices of ROIs of the given class
+        ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
+        # Apply NMS
+        class_keep = tf.image.non_max_suppression(
+                tf.gather(pre_nms_rois, ixs),
+                tf.gather(pre_nms_scores, ixs),
+                max_output_size=config.DETECTION_MAX_INSTANCES,
+                iou_threshold=config.DETECTION_NMS_THRESHOLD)
+        # Map indices
+        class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
+        # Pad with -1 so returned tensors have the same shape
+        gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0]
+        class_keep = tf.pad(class_keep, [(0, gap)],
+                            mode='CONSTANT', constant_values=-1)
+        # Set shape so map_fn() can infer result shape
+        class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
+        return class_keep
+
+    # 2. Map over class IDs
+    nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids,
+                         dtype=tf.int64)
+    # 3. Merge results into one list, and remove -1 padding
+    nms_keep = tf.reshape(nms_keep, [-1])
+    nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
+    # 4. Compute intersection between keep and nms_keep
+    keep = tf.sets.intersection(tf.expand_dims(keep, 0),
+                                    tf.expand_dims(nms_keep, 0))
+    keep = tf.sparse.to_dense(keep)[0]
+    # Keep top detections
+    roi_count = config.DETECTION_MAX_INSTANCES
+    class_scores_keep = tf.gather(class_scores, keep)
+    num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
+    top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
+    keep = tf.gather(keep, top_ids)
+
+    # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
+    # Coordinates are normalized.
+    detections = tf.concat([
+        tf.gather(refined_rois, keep),
+        tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis],
+        tf.gather(class_scores, keep)[..., tf.newaxis]
+        ], axis=1)
+
+    # Pad with zeros if detections < DETECTION_MAX_INSTANCES
+    gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
+    detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
+    return detections
+
+
+class DetectionLayer(KE.Layer):
+    """Takes classified proposal boxes and their bounding box deltas and
+    returns the final detection boxes.
+    Returns:
+    [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where
+    coordinates are normalized.
+    """
+
+    def __init__(self, config=None, **kwargs):
+        super(DetectionLayer, self).__init__(**kwargs)
+        self.config = config
+
+    def call(self, inputs):
+        rois = inputs[0]
+        mrcnn_class = inputs[1]
+        mrcnn_bbox = inputs[2]
+        image_meta = inputs[3]
+
+        # Get windows of images in normalized coordinates. Windows are the area
+        # in the image that excludes the padding.
+        # Use the shape of the first image in the batch to normalize the window
+        # because we know that all images get resized to the same size.
+        m = parse_image_meta_graph(image_meta)
+        image_shape = m['image_shape'][0]
+        window = norm_boxes_graph(m['window'], image_shape[:2])
+
+        # Run detection refinement graph on each item in the batch
+        detections_batch = utils.batch_slice(
+            [rois, mrcnn_class, mrcnn_bbox, window],
+            lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config),
+            self.config.IMAGES_PER_GPU)
+
+        # Reshape output
+        # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in
+        # normalized coordinates
+        return tf.reshape(
+            detections_batch,
+            [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
+
+    def compute_output_shape(self, input_shape):
+        return (None, self.config.DETECTION_MAX_INSTANCES, 6)
+
+
+############################################################
+#  Region Proposal Network (RPN)
+############################################################
+
+def rpn_graph(feature_map, anchors_per_location, anchor_stride):
+    """Builds the computation graph of Region Proposal Network.
+    feature_map: backbone features [batch, height, width, depth]
+    anchors_per_location: number of anchors per pixel in the feature map
+    anchor_stride: Controls the density of anchors. Typically 1 (anchors for
+                   every pixel in the feature map), or 2 (every other pixel).
+    Returns:
+        rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax)
+        rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities.
+        rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be
+                  applied to anchors.
+    """
+    # TODO: check if stride of 2 causes alignment issues if the feature map
+    # is not even.
+    # Shared convolutional base of the RPN
+    shared = KL.Conv2D(512, (3, 3), padding='same', activation='relu',
+                       strides=anchor_stride,
+                       name='rpn_conv_shared')(feature_map)
+
+    # Anchor Score. [batch, height, width, anchors per location * 2].
+    x = KL.Conv2D(2 * anchors_per_location, (1, 1), padding='valid',
+                  activation='linear', name='rpn_class_raw')(shared)
+
+    # Reshape to [batch, anchors, 2]
+    rpn_class_logits = KL.Lambda(
+        lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 2]))(x)
+
+    # Softmax on last dimension of BG/FG.
+    rpn_probs = KL.Activation(
+        "softmax", name="rpn_class_xxx")(rpn_class_logits)
+
+    # Bounding box refinement. [batch, H, W, anchors per location * depth]
+    # where depth is [x, y, log(w), log(h)]
+    x = KL.Conv2D(anchors_per_location * 4, (1, 1), padding="valid",
+                  activation='linear', name='rpn_bbox_pred')(shared)
+
+    # Reshape to [batch, anchors, 4]
+    rpn_bbox = KL.Lambda(lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 4]))(x)
+
+    return [rpn_class_logits, rpn_probs, rpn_bbox]
+
+
+def build_rpn_model(anchor_stride, anchors_per_location, depth):
+    """Builds a Keras model of the Region Proposal Network.
+    It wraps the RPN graph so it can be used multiple times with shared
+    weights.
+    anchors_per_location: number of anchors per pixel in the feature map
+    anchor_stride: Controls the density of anchors. Typically 1 (anchors for
+                   every pixel in the feature map), or 2 (every other pixel).
+    depth: Depth of the backbone feature map.
+    Returns a Keras Model object. The model outputs, when called, are:
+    rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax)
+    rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities.
+    rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be
+                applied to anchors.
+    """
+    input_feature_map = KL.Input(shape=[None, None, depth],
+                                 name="input_rpn_feature_map")
+    outputs = rpn_graph(input_feature_map, anchors_per_location, anchor_stride)
+    return KM.Model([input_feature_map], outputs, name="rpn_model")
+
+
+############################################################
+#  Feature Pyramid Network Heads
+############################################################
+
+def fpn_classifier_graph(rois, feature_maps, image_meta,
+                         pool_size, num_classes, train_bn=True,
+                         fc_layers_size=1024):
+    """Builds the computation graph of the feature pyramid network classifier
+    and regressor heads.
+    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
+          coordinates.
+    feature_maps: List of feature maps from different layers of the pyramid,
+                  [P2, P3, P4, P5]. Each has a different resolution.
+    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
+    pool_size: The width of the square feature map generated from ROI Pooling.
+    num_classes: number of classes, which determines the depth of the results
+    train_bn: Boolean. Train or freeze Batch Norm layers
+    fc_layers_size: Size of the 2 FC layers
+    Returns:
+        logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax)
+        probs: [batch, num_rois, NUM_CLASSES] classifier probabilities
+        bbox_deltas: [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to
+                     proposal boxes
+    """
+    # ROI Pooling
+    # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels]
+    x = PyramidROIAlign([pool_size, pool_size],
+                        name="roi_align_classifier")([rois, image_meta] + feature_maps)
+    # Two 1024 FC layers (implemented with Conv2D for consistency)
+    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
+                           name="mrcnn_class_conv1")(x)
+    x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn1')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
+                           name="mrcnn_class_conv2")(x)
+    x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn2')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
+                       name="pool_squeeze")(x)
+
+    # Classifier head
+    mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
+                                            name='mrcnn_class_logits')(shared)
+    mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"),
+                                     name="mrcnn_class")(mrcnn_class_logits)
+
+    # BBox head
+    # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))]
+    x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'),
+                           name='mrcnn_bbox_fc')(shared)
+    # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
+    s = K.int_shape(x)
+    mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x)
+
+    return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
+
+
+def build_fpn_mask_graph(rois, feature_maps, image_meta,
+                         pool_size, num_classes, train_bn=True):
+    """Builds the computation graph of the mask head of Feature Pyramid Network.
+    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
+          coordinates.
+    feature_maps: List of feature maps from different layers of the pyramid,
+                  [P2, P3, P4, P5]. Each has a different resolution.
+    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
+    pool_size: The width of the square feature map generated from ROI Pooling.
+    num_classes: number of classes, which determines the depth of the results
+    train_bn: Boolean. Train or freeze Batch Norm layers
+    Returns: Masks [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, NUM_CLASSES]
+    """
+    # ROI Pooling
+    # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels]
+    x = PyramidROIAlign([pool_size, pool_size],
+                        name="roi_align_mask")([rois, image_meta] + feature_maps)
+
+    # Conv layers
+    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
+                           name="mrcnn_mask_conv1")(x)
+    x = KL.TimeDistributed(BatchNorm(),
+                           name='mrcnn_mask_bn1')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
+                           name="mrcnn_mask_conv2")(x)
+    x = KL.TimeDistributed(BatchNorm(),
+                           name='mrcnn_mask_bn2')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
+                           name="mrcnn_mask_conv3")(x)
+    x = KL.TimeDistributed(BatchNorm(),
+                           name='mrcnn_mask_bn3')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
+                           name="mrcnn_mask_conv4")(x)
+    x = KL.TimeDistributed(BatchNorm(),
+                           name='mrcnn_mask_bn4')(x, training=train_bn)
+    x = KL.Activation('relu')(x)
+
+    x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"),
+                           name="mrcnn_mask_deconv")(x)
+    x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"),
+                           name="mrcnn_mask")(x)
+    return x
+
+
+############################################################
+#  Loss Functions
+############################################################
+
+def smooth_l1_loss(y_true, y_pred):
+    """Implements Smooth-L1 loss.
+    y_true and y_pred are typically: [N, 4], but could be any shape.
+    """
+    diff = K.abs(y_true - y_pred)
+    less_than_one = K.cast(K.less(diff, 1.0), "float32")
+    loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)
+    return loss
+
+
+def rpn_class_loss_graph(rpn_match, rpn_class_logits):
+    """RPN anchor classifier loss.
+    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
+               -1=negative, 0=neutral anchor.
+    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for BG/FG.
+    """
+    # Squeeze last dim to simplify
+    rpn_match = tf.squeeze(rpn_match, -1)
+    # Get anchor classes. Convert the -1/+1 match to 0/1 values.
+    anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32)
+    # Positive and Negative anchors contribute to the loss,
+    # but neutral anchors (match value = 0) don't.
+    indices = tf.where(K.not_equal(rpn_match, 0))
+    # Pick rows that contribute to the loss and filter out the rest.
+    rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)
+    anchor_class = tf.gather_nd(anchor_class, indices)
+    # Cross entropy loss
+    loss = K.sparse_categorical_crossentropy(target=anchor_class,
+                                             output=rpn_class_logits,
+                                             from_logits=True)
+    loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
+    return loss
+
+
+def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox):
+    """Return the RPN bounding box loss graph.
+    config: the model config object.
+    target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))].
+        Uses 0 padding to fill in unsed bbox deltas.
+    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
+               -1=negative, 0=neutral anchor.
+    rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]
+    """
+    # Positive anchors contribute to the loss, but negative and
+    # neutral anchors (match value of 0 or -1) don't.
+    rpn_match = K.squeeze(rpn_match, -1)
+    indices = tf.where(K.equal(rpn_match, 1))
+
+    # Pick bbox deltas that contribute to the loss
+    rpn_bbox = tf.gather_nd(rpn_bbox, indices)
+
+    # Trim target bounding box deltas to the same length as rpn_bbox.
+    batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1)
+    target_bbox = batch_pack_graph(target_bbox, batch_counts,
+                                   config.IMAGES_PER_GPU)
+
+    loss = smooth_l1_loss(target_bbox, rpn_bbox)
+    
+    loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
+    return loss
+
+
+def mrcnn_class_loss_graph(target_class_ids, pred_class_logits,
+                           active_class_ids):
+    """Loss for the classifier head of Mask RCNN.
+    target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero
+        padding to fill in the array.
+    pred_class_logits: [batch, num_rois, num_classes]
+    active_class_ids: [batch, num_classes]. Has a value of 1 for
+        classes that are in the dataset of the image, and 0
+        for classes that are not in the dataset.
+    """
+    # During model building, Keras calls this function with
+    # target_class_ids of type float32. Unclear why. Cast it
+    # to int to get around it.
+    target_class_ids = tf.cast(target_class_ids, 'int64')
+
+    # Find predictions of classes that are not in the dataset.
+    pred_class_ids = tf.argmax(pred_class_logits, axis=2)
+    # TODO: Update this line to work with batch > 1. Right now it assumes all
+    #       images in a batch have the same active_class_ids
+    pred_active = tf.gather(active_class_ids[0], pred_class_ids)
+
+    # Loss
+    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=target_class_ids, logits=pred_class_logits)
+
+    # Erase losses of predictions of classes that are not in the active
+    # classes of the image.
+    loss = loss * pred_active
+
+    # Computer loss mean. Use only predictions that contribute
+    # to the loss to get a correct mean.
+    loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active)
+    return loss
+
+
+def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox):
+    """Loss for Mask R-CNN bounding box refinement.
+    target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))]
+    target_class_ids: [batch, num_rois]. Integer class IDs.
+    pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))]
+    """
+    # Reshape to merge batch and roi dimensions for simplicity.
+    target_class_ids = K.reshape(target_class_ids, (-1,))
+    target_bbox = K.reshape(target_bbox, (-1, 4))
+    pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4))
+
+    # Only positive ROIs contribute to the loss. And only
+    # the right class_id of each ROI. Get their indices.
+    positive_roi_ix = tf.where(target_class_ids > 0)[:, 0]
+    positive_roi_class_ids = tf.cast(
+        tf.gather(target_class_ids, positive_roi_ix), tf.int64)
+    indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1)
+
+    # Gather the deltas (predicted and true) that contribute to loss
+    target_bbox = tf.gather(target_bbox, positive_roi_ix)
+    pred_bbox = tf.gather_nd(pred_bbox, indices)
+
+    # Smooth-L1 Loss
+    loss = K.switch(tf.size(target_bbox) > 0,
+                    smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox),
+                    tf.constant(0.0))
+    loss = K.mean(loss)
+    return loss
+
+
+def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks):
+    """Mask binary cross-entropy loss for the masks head.
+    target_masks: [batch, num_rois, height, width].
+        A float32 tensor of values 0 or 1. Uses zero padding to fill array.
+    target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded.
+    pred_masks: [batch, proposals, height, width, num_classes] float32 tensor
+                with values from 0 to 1.
+    """
+    # Reshape for simplicity. Merge first two dimensions into one.
+    target_class_ids = K.reshape(target_class_ids, (-1,))
+    mask_shape = tf.shape(target_masks)
+    target_masks = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3]))
+    pred_shape = tf.shape(pred_masks)
+    pred_masks = K.reshape(pred_masks,
+                           (-1, pred_shape[2], pred_shape[3], pred_shape[4]))
+    # Permute predicted masks to [N, num_classes, height, width]
+    pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2])
+
+    # Only positive ROIs contribute to the loss. And only
+    # the class specific mask of each ROI.
+    positive_ix = tf.where(target_class_ids > 0)[:, 0]
+    positive_class_ids = tf.cast(
+        tf.gather(target_class_ids, positive_ix), tf.int64)
+    indices = tf.stack([positive_ix, positive_class_ids], axis=1)
+
+    # Gather the masks (predicted and true) that contribute to loss
+    y_true = tf.gather(target_masks, positive_ix)
+    y_pred = tf.gather_nd(pred_masks, indices)
+
+    # Compute binary cross entropy. If no positive ROIs, then return 0.
+    # shape: [batch, roi, num_classes]
+    loss = K.switch(tf.size(y_true) > 0,
+                    K.binary_crossentropy(target=y_true, output=y_pred),
+                    tf.constant(0.0))
+    loss = K.mean(loss)
+    return loss
+
+
+############################################################
+#  Data Generator
+############################################################
+
+def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
+                  use_mini_mask=False):
+    """Load and return ground truth data for an image (image, mask, bounding boxes).
+    augment: (deprecated. Use augmentation instead). If true, apply random
+        image augmentation. Currently, only horizontal flipping is offered.
+    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
+        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
+        right/left 50% of the time.
+    use_mini_mask: If False, returns full-size masks that are the same height
+        and width as the original image. These can be big, for example
+        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
+        224x224 and are generated by extracting the bounding box of the
+        object and resizing it to MINI_MASK_SHAPE.
+    Returns:
+    image: [height, width, 3]
+    shape: the original shape of the image before resizing and cropping.
+    class_ids: [instance_count] Integer class IDs
+    bbox: [instance_count, (y1, x1, y2, x2)]
+    mask: [height, width, instance_count]. The height and width are those
+        of the image unless use_mini_mask is True, in which case they are
+        defined in MINI_MASK_SHAPE.
+    """
+    # Load image and mask
+    image = dataset.load_image(image_id)
+    mask, class_ids = dataset.load_mask(image_id)
+    original_shape = image.shape
+    image, window, scale, padding, crop = utils.resize_image(
+        image,
+        min_dim=config.IMAGE_MIN_DIM,
+        min_scale=config.IMAGE_MIN_SCALE,
+        max_dim=config.IMAGE_MAX_DIM,
+        mode=config.IMAGE_RESIZE_MODE)
+    mask = utils.resize_mask(mask, scale, padding, crop)
+
+    # Random horizontal flips.
+    # TODO: will be removed in a future update in favor of augmentation
+    if augment:
+        logging.warning("'augment' is deprecated. Use 'augmentation' instead.")
+        if random.randint(0, 1):
+            image = np.fliplr(image)
+            mask = np.fliplr(mask)
+
+    # Augmentation
+    # This requires the imgaug lib (https://github.com/aleju/imgaug)
+    if augmentation:
+        import imgaug
+
+        # Augmenters that are safe to apply to masks
+        # Some, such as Affine, have settings that make them unsafe, so always
+        # test your augmentation on masks
+        MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes",
+                           "Fliplr", "Flipud", "CropAndPad",
+                           "Affine", "PiecewiseAffine"]
+
+        def hook(images, augmenter, parents, default):
+            """Determines which augmenters to apply to masks."""
+            return augmenter.__class__.__name__ in MASK_AUGMENTERS
+
+        # Store shapes before augmentation to compare
+        image_shape = image.shape
+        mask_shape = mask.shape
+        # Make augmenters deterministic to apply similarly to images and masks
+        det = augmentation.to_deterministic()
+        image = det.augment_image(image)
+        # Change mask to np.uint8 because imgaug doesn't support np.bool
+        mask = det.augment_image(mask.astype(np.uint8),
+                                 hooks=imgaug.HooksImages(activator=hook))
+        # Verify that shapes didn't change
+        assert image.shape == image_shape, "Augmentation shouldn't change image size"
+        assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
+        # Change mask back to bool
+        mask = mask.astype(np.bool)
+
+    # Note that some boxes might be all zeros if the corresponding mask got cropped out.
+    # and here is to filter them out
+    _idx = np.sum(mask, axis=(0, 1)) > 0
+    mask = mask[:, :, _idx]
+    class_ids = class_ids[_idx]
+    # Bounding boxes. Note that some boxes might be all zeros
+    # if the corresponding mask got cropped out.
+    # bbox: [num_instances, (y1, x1, y2, x2)]
+    bbox = utils.extract_bboxes(mask)
+
+    # Active classes
+    # Different datasets have different classes, so track the
+    # classes supported in the dataset of this image.
+    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
+    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]]
+    active_class_ids[source_class_ids] = 1
+
+    # Resize masks to smaller size to reduce memory usage
+    if use_mini_mask:
+        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)
+
+    # Image meta data
+    image_meta = compose_image_meta(image_id, original_shape, image.shape,
+                                    window, scale, active_class_ids)
+
+    return image, image_meta, class_ids, bbox, mask
+
+
+def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks, config):
+    """Generate targets for training Stage 2 classifier and mask heads.
+    This is not used in normal training. It's useful for debugging or to train
+    the Mask RCNN heads without using the RPN head.
+    Inputs:
+    rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes.
+    gt_class_ids: [instance count] Integer class IDs
+    gt_boxes: [instance count, (y1, x1, y2, x2)]
+    gt_masks: [height, width, instance count] Ground truth masks. Can be full
+              size or mini-masks.
+    Returns:
+    rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]
+    class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
+    bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific
+            bbox refinements.
+    masks: [TRAIN_ROIS_PER_IMAGE, height, width, NUM_CLASSES). Class specific masks cropped
+           to bbox boundaries and resized to neural network output size.
+    """
+    assert rpn_rois.shape[0] > 0
+    assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format(
+        gt_class_ids.dtype)
+    assert gt_boxes.dtype == np.int32, "Expected int but got {}".format(
+        gt_boxes.dtype)
+    assert gt_masks.dtype == np.bool_, "Expected bool but got {}".format(
+        gt_masks.dtype)
+
+    # It's common to add GT Boxes to ROIs but we don't do that here because
+    # according to XinLei Chen's paper, it doesn't help.
+
+    # Trim empty padding in gt_boxes and gt_masks parts
+    instance_ids = np.where(gt_class_ids > 0)[0]
+    assert instance_ids.shape[0] > 0, "Image must contain instances."
+    gt_class_ids = gt_class_ids[instance_ids]
+    gt_boxes = gt_boxes[instance_ids]
+    gt_masks = gt_masks[:, :, instance_ids]
+
+    # Compute areas of ROIs and ground truth boxes.
+    rpn_roi_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * \
+        (rpn_rois[:, 3] - rpn_rois[:, 1])
+    gt_box_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * \
+        (gt_boxes[:, 3] - gt_boxes[:, 1])
+
+    # Compute overlaps [rpn_rois, gt_boxes]
+    overlaps = np.zeros((rpn_rois.shape[0], gt_boxes.shape[0]))
+    for i in range(overlaps.shape[1]):
+        gt = gt_boxes[i]
+        overlaps[:, i] = utils.compute_iou(
+            gt, rpn_rois, gt_box_area[i], rpn_roi_area)
+
+    # Assign ROIs to GT boxes
+    rpn_roi_iou_argmax = np.argmax(overlaps, axis=1)
+    rpn_roi_iou_max = overlaps[np.arange(
+        overlaps.shape[0]), rpn_roi_iou_argmax]
+    # GT box assigned to each ROI
+    rpn_roi_gt_boxes = gt_boxes[rpn_roi_iou_argmax]
+    rpn_roi_gt_class_ids = gt_class_ids[rpn_roi_iou_argmax]
+
+    # Positive ROIs are those with >= 0.5 IoU with a GT box.
+    fg_ids = np.where(rpn_roi_iou_max > 0.5)[0]
+
+    # Negative ROIs are those with max IoU 0.1-0.5 (hard example mining)
+    # TODO: To hard example mine or not to hard example mine, that's the question
+    # bg_ids = np.where((rpn_roi_iou_max >= 0.1) & (rpn_roi_iou_max < 0.5))[0]
+    bg_ids = np.where(rpn_roi_iou_max < 0.5)[0]
+
+    # Subsample ROIs. Aim for 33% foreground.
+    # FG
+    fg_roi_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO)
+    if fg_ids.shape[0] > fg_roi_count:
+        keep_fg_ids = np.random.choice(fg_ids, fg_roi_count, replace=False)
+    else:
+        keep_fg_ids = fg_ids
+    # BG
+    remaining = config.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[0]
+    if bg_ids.shape[0] > remaining:
+        keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False)
+    else:
+        keep_bg_ids = bg_ids
+    # Combine indices of ROIs to keep
+    keep = np.concatenate([keep_fg_ids, keep_bg_ids])
+    # Need more?
+    remaining = config.TRAIN_ROIS_PER_IMAGE - keep.shape[0]
+    if remaining > 0:
+        # Looks like we don't have enough samples to maintain the desired
+        # balance. Reduce requirements and fill in the rest. This is
+        # likely different from the Mask RCNN paper.
+
+        # There is a small chance we have neither fg nor bg samples.
+        if keep.shape[0] == 0:
+            # Pick bg regions with easier IoU threshold
+            bg_ids = np.where(rpn_roi_iou_max < 0.5)[0]
+            assert bg_ids.shape[0] >= remaining
+            keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False)
+            assert keep_bg_ids.shape[0] == remaining
+            keep = np.concatenate([keep, keep_bg_ids])
+        else:
+            # Fill the rest with repeated bg rois.
+            keep_extra_ids = np.random.choice(
+                keep_bg_ids, remaining, replace=True)
+            keep = np.concatenate([keep, keep_extra_ids])
+    assert keep.shape[0] == config.TRAIN_ROIS_PER_IMAGE, \
+        "keep doesn't match ROI batch size {}, {}".format(
+            keep.shape[0], config.TRAIN_ROIS_PER_IMAGE)
+
+    # Reset the gt boxes assigned to BG ROIs.
+    rpn_roi_gt_boxes[keep_bg_ids, :] = 0
+    rpn_roi_gt_class_ids[keep_bg_ids] = 0
+
+    # For each kept ROI, assign a class_id, and for FG ROIs also add bbox refinement.
+    rois = rpn_rois[keep]
+    roi_gt_boxes = rpn_roi_gt_boxes[keep]
+    roi_gt_class_ids = rpn_roi_gt_class_ids[keep]
+    roi_gt_assignment = rpn_roi_iou_argmax[keep]
+
+    # Class-aware bbox deltas. [y, x, log(h), log(w)]
+    bboxes = np.zeros((config.TRAIN_ROIS_PER_IMAGE,
+                       config.NUM_CLASSES, 4), dtype=np.float32)
+    pos_ids = np.where(roi_gt_class_ids > 0)[0]
+    bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = utils.box_refinement(
+        rois[pos_ids], roi_gt_boxes[pos_ids, :4])
+    # Normalize bbox refinements
+    bboxes /= config.BBOX_STD_DEV
+
+    # Generate class-specific target masks
+    masks = np.zeros((config.TRAIN_ROIS_PER_IMAGE, config.MASK_SHAPE[0], config.MASK_SHAPE[1], config.NUM_CLASSES),
+                     dtype=np.float32)
+    for i in pos_ids:
+        class_id = roi_gt_class_ids[i]
+        assert class_id > 0, "class id must be greater than 0"
+        gt_id = roi_gt_assignment[i]
+        class_mask = gt_masks[:, :, gt_id]
+
+        if config.USE_MINI_MASK:
+            # Create a mask placeholder, the size of the image
+            placeholder = np.zeros(config.IMAGE_SHAPE[:2], dtype=bool)
+            # GT box
+            gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[gt_id]
+            gt_w = gt_x2 - gt_x1
+            gt_h = gt_y2 - gt_y1
+            # Resize mini mask to size of GT box
+            placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = \
+                np.round(utils.resize(class_mask, (gt_h, gt_w))).astype(bool)
+            # Place the mini batch in the placeholder
+            class_mask = placeholder
+
+        # Pick part of the mask and resize it
+        y1, x1, y2, x2 = rois[i].astype(np.int32)
+        m = class_mask[y1:y2, x1:x2]
+        mask = utils.resize(m, config.MASK_SHAPE)
+        masks[i, :, :, class_id] = mask
+
+    return rois, roi_gt_class_ids, bboxes, masks
+
+
+def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config):
+    """Given the anchors and GT boxes, compute overlaps and identify positive
+    anchors and deltas to refine them to match their corresponding GT boxes.
+    anchors: [num_anchors, (y1, x1, y2, x2)]
+    gt_class_ids: [num_gt_boxes] Integer class IDs.
+    gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)]
+    Returns:
+    rpn_match: [N] (int32) matches between anchors and GT boxes.
+               1 = positive anchor, -1 = negative anchor, 0 = neutral
+    rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
+    """
+    # RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral
+    rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32)
+    # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))]
+    rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4))
+
+    # Handle COCO crowds
+    # A crowd box in COCO is a bounding box around several instances. Exclude
+    # them from training. A crowd box is given a negative class ID.
+    crowd_ix = np.where(gt_class_ids < 0)[0]
+    if crowd_ix.shape[0] > 0:
+        # Filter out crowds from ground truth class IDs and boxes
+        non_crowd_ix = np.where(gt_class_ids > 0)[0]
+        crowd_boxes = gt_boxes[crowd_ix]
+        gt_class_ids = gt_class_ids[non_crowd_ix]
+        gt_boxes = gt_boxes[non_crowd_ix]
+        # Compute overlaps with crowd boxes [anchors, crowds]
+        crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes)
+        crowd_iou_max = np.amax(crowd_overlaps, axis=1)
+        no_crowd_bool = (crowd_iou_max < 0.001)
+    else:
+        # All anchors don't intersect a crowd
+        no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool)
+
+    # Compute overlaps [num_anchors, num_gt_boxes]
+    overlaps = utils.compute_overlaps(anchors, gt_boxes)
+
+    # Match anchors to GT Boxes
+    # If an anchor overlaps a GT box with IoU >= 0.7 then it's positive.
+    # If an anchor overlaps a GT box with IoU < 0.3 then it's negative.
+    # Neutral anchors are those that don't match the conditions above,
+    # and they don't influence the loss function.
+    # However, don't keep any GT box unmatched (rare, but happens). Instead,
+    # match it to the closest anchor (even if its max IoU is < 0.3).
+    #
+    # 1. Set negative anchors first. They get overwritten below if a GT box is
+    # matched to them. Skip boxes in crowd areas.
+    anchor_iou_argmax = np.argmax(overlaps, axis=1)
+    anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax]
+    rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1
+    # 2. Set an anchor for each GT box (regardless of IoU value).
+    # If multiple anchors have the same IoU match all of them
+    gt_iou_argmax = np.argwhere(overlaps == np.max(overlaps, axis=0))[:,0]
+    rpn_match[gt_iou_argmax] = 1
+    # 3. Set anchors with high overlap as positive.
+    rpn_match[anchor_iou_max >= 0.7] = 1
+
+    # Subsample to balance positive and negative anchors
+    # Don't let positives be more than half the anchors
+    ids = np.where(rpn_match == 1)[0]
+    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2)
+    if extra > 0:
+        # Reset the extra ones to neutral
+        ids = np.random.choice(ids, extra, replace=False)
+        rpn_match[ids] = 0
+    # Same for negative proposals
+    ids = np.where(rpn_match == -1)[0]
+    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE -
+                        np.sum(rpn_match == 1))
+    if extra > 0:
+        # Rest the extra ones to neutral
+        ids = np.random.choice(ids, extra, replace=False)
+        rpn_match[ids] = 0
+
+    # For positive anchors, compute shift and scale needed to transform them
+    # to match the corresponding GT boxes.
+    ids = np.where(rpn_match == 1)[0]
+    ix = 0  # index into rpn_bbox
+    # TODO: use box_refinement() rather than duplicating the code here
+    for i, a in zip(ids, anchors[ids]):
+        # Closest gt box (it might have IoU < 0.7)
+        gt = gt_boxes[anchor_iou_argmax[i]]
+
+        # Convert coordinates to center plus width/height.
+        # GT Box
+        gt_h = gt[2] - gt[0]
+        gt_w = gt[3] - gt[1]
+        gt_center_y = gt[0] + 0.5 * gt_h
+        gt_center_x = gt[1] + 0.5 * gt_w
+        # Anchor
+        a_h = a[2] - a[0]
+        a_w = a[3] - a[1]
+        a_center_y = a[0] + 0.5 * a_h
+        a_center_x = a[1] + 0.5 * a_w
+
+        # Compute the bbox refinement that the RPN should predict.
+        rpn_bbox[ix] = [
+            (gt_center_y - a_center_y) / a_h,
+            (gt_center_x - a_center_x) / a_w,
+            np.log(gt_h / a_h),
+            np.log(gt_w / a_w),
+        ]
+        # Normalize
+        rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV
+        ix += 1
+
+    return rpn_match, rpn_bbox
+
+
+def generate_random_rois(image_shape, count, gt_class_ids, gt_boxes):
+    """Generates ROI proposals similar to what a region proposal network
+    would generate.
+    image_shape: [Height, Width, Depth]
+    count: Number of ROIs to generate
+    gt_class_ids: [N] Integer ground truth class IDs
+    gt_boxes: [N, (y1, x1, y2, x2)] Ground truth boxes in pixels.
+    Returns: [count, (y1, x1, y2, x2)] ROI boxes in pixels.
+    """
+    # placeholder
+    rois = np.zeros((count, 4), dtype=np.int32)
+
+    # Generate random ROIs around GT boxes (90% of count)
+    rois_per_box = int(0.9 * count / gt_boxes.shape[0])
+    for i in range(gt_boxes.shape[0]):
+        gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[i]
+        h = gt_y2 - gt_y1
+        w = gt_x2 - gt_x1
+        # random boundaries
+        r_y1 = max(gt_y1 - h, 0)
+        r_y2 = min(gt_y2 + h, image_shape[0])
+        r_x1 = max(gt_x1 - w, 0)
+        r_x2 = min(gt_x2 + w, image_shape[1])
+
+        # To avoid generating boxes with zero area, we generate double what
+        # we need and filter out the extra. If we get fewer valid boxes
+        # than we need, we loop and try again.
+        while True:
+            y1y2 = np.random.randint(r_y1, r_y2, (rois_per_box * 2, 2))
+            x1x2 = np.random.randint(r_x1, r_x2, (rois_per_box * 2, 2))
+            # Filter out zero area boxes
+            threshold = 1
+            y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >=
+                        threshold][:rois_per_box]
+            x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >=
+                        threshold][:rois_per_box]
+            if y1y2.shape[0] == rois_per_box and x1x2.shape[0] == rois_per_box:
+                break
+
+        # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape
+        # into x1, y1, x2, y2 order
+        x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1)
+        y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1)
+        box_rois = np.hstack([y1, x1, y2, x2])
+        rois[rois_per_box * i:rois_per_box * (i + 1)] = box_rois
+
+    # Generate random ROIs anywhere in the image (10% of count)
+    remaining_count = count - (rois_per_box * gt_boxes.shape[0])
+    # To avoid generating boxes with zero area, we generate double what
+    # we need and filter out the extra. If we get fewer valid boxes
+    # than we need, we loop and try again.
+    while True:
+        y1y2 = np.random.randint(0, image_shape[0], (remaining_count * 2, 2))
+        x1x2 = np.random.randint(0, image_shape[1], (remaining_count * 2, 2))
+        # Filter out zero area boxes
+        threshold = 1
+        y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >=
+                    threshold][:remaining_count]
+        x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >=
+                    threshold][:remaining_count]
+        if y1y2.shape[0] == remaining_count and x1x2.shape[0] == remaining_count:
+            break
+
+    # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape
+    # into x1, y1, x2, y2 order
+    x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1)
+    y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1)
+    global_rois = np.hstack([y1, x1, y2, x2])
+    rois[-remaining_count:] = global_rois
+    return rois
+
+
+def data_generator(dataset, config, shuffle=True, augment=False, augmentation=None,
+                   random_rois=0, batch_size=1, detection_targets=False,
+                   no_augmentation_sources=None):
+    """A generator that returns images and corresponding target class ids,
+    bounding box deltas, and masks.
+    dataset: The Dataset object to pick data from
+    config: The model config object
+    shuffle: If True, shuffles the samples before every epoch
+    augment: (deprecated. Use augmentation instead). If true, apply random
+        image augmentation. Currently, only horizontal flipping is offered.
+    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
+        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
+        right/left 50% of the time.
+    random_rois: If > 0 then generate proposals to be used to train the
+                 network classifier and mask heads. Useful if training
+                 the Mask RCNN part without the RPN.
+    batch_size: How many images to return in each call
+    detection_targets: If True, generate detection targets (class IDs, bbox
+        deltas, and masks). Typically for debugging or visualizations because
+        in trainig detection targets are generated by DetectionTargetLayer.
+    no_augmentation_sources: Optional. List of sources to exclude for
+        augmentation. A source is string that identifies a dataset and is
+        defined in the Dataset class.
+    Returns a Python generator. Upon calling next() on it, the
+    generator returns two lists, inputs and outputs. The contents
+    of the lists differs depending on the received arguments:
+    inputs list:
+    - images: [batch, H, W, C]
+    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
+    - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
+    - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
+    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
+    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
+    - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
+                are those of the image unless use_mini_mask is True, in which
+                case they are defined in MINI_MASK_SHAPE.
+    outputs list: Usually empty in regular training. But if detection_targets
+        is True then the outputs list contains target class_ids, bbox deltas,
+        and masks.
+    """
+    b = 0  # batch item index
+    image_index = -1
+    image_ids = np.copy(dataset.image_ids)
+    error_count = 0
+    no_augmentation_sources = no_augmentation_sources or []
+
+    # Anchors
+    # [anchor_count, (y1, x1, y2, x2)]
+    backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE)
+    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
+                                             config.RPN_ANCHOR_RATIOS,
+                                             backbone_shapes,
+                                             config.BACKBONE_STRIDES,
+                                             config.RPN_ANCHOR_STRIDE)
+
+    # Keras requires a generator to run indefinitely.
+    while True:
+        try:
+            # Increment index to pick next image. Shuffle if at the start of an epoch.
+            image_index = (image_index + 1) % len(image_ids)
+            if shuffle and image_index == 0:
+                np.random.shuffle(image_ids)
+
+            # Get GT bounding boxes and masks for image.
+            image_id = image_ids[image_index]
+
+            # If the image source is not to be augmented pass None as augmentation
+            if dataset.image_info[image_id]['source'] in no_augmentation_sources:
+                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
+                load_image_gt(dataset, config, image_id, augment=augment,
+                              augmentation=None,
+                              use_mini_mask=config.USE_MINI_MASK)
+            else:
+                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
+                    load_image_gt(dataset, config, image_id, augment=augment,
+                                augmentation=augmentation,
+                                use_mini_mask=config.USE_MINI_MASK)
+
+            # Skip images that have no instances. This can happen in cases
+            # where we train on a subset of classes and the image doesn't
+            # have any of the classes we care about.
+            if not np.any(gt_class_ids > 0):
+                continue
+
+            # RPN Targets
+            rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
+                                                    gt_class_ids, gt_boxes, config)
+
+            # Mask R-CNN Targets
+            if random_rois:
+                rpn_rois = generate_random_rois(
+                    image.shape, random_rois, gt_class_ids, gt_boxes)
+                if detection_targets:
+                    rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask =\
+                        build_detection_targets(
+                            rpn_rois, gt_class_ids, gt_boxes, gt_masks, config)
+
+            # Init batch arrays
+            if b == 0:
+                batch_image_meta = np.zeros(
+                    (batch_size,) + image_meta.shape, dtype=image_meta.dtype)
+                batch_rpn_match = np.zeros(
+                    [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype)
+                batch_rpn_bbox = np.zeros(
+                    [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype)
+                batch_images = np.zeros(
+                    (batch_size,) + image.shape, dtype=np.float32)
+                batch_gt_class_ids = np.zeros(
+                    (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
+                batch_gt_boxes = np.zeros(
+                    (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
+                batch_gt_masks = np.zeros(
+                    (batch_size, gt_masks.shape[0], gt_masks.shape[1],
+                     config.MAX_GT_INSTANCES), dtype=gt_masks.dtype)
+                if random_rois:
+                    batch_rpn_rois = np.zeros(
+                        (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype)
+                    if detection_targets:
+                        batch_rois = np.zeros(
+                            (batch_size,) + rois.shape, dtype=rois.dtype)
+                        batch_mrcnn_class_ids = np.zeros(
+                            (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype)
+                        batch_mrcnn_bbox = np.zeros(
+                            (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype)
+                        batch_mrcnn_mask = np.zeros(
+                            (batch_size,) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype)
+
+            # If more instances than fits in the array, sub-sample from them.
+            if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
+                ids = np.random.choice(
+                    np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False)
+                gt_class_ids = gt_class_ids[ids]
+                gt_boxes = gt_boxes[ids]
+                gt_masks = gt_masks[:, :, ids]
+
+            # Add to batch
+            batch_image_meta[b] = image_meta
+            batch_rpn_match[b] = rpn_match[:, np.newaxis]
+            batch_rpn_bbox[b] = rpn_bbox
+            batch_images[b] = mold_image(image.astype(np.float32), config)
+            batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids
+            batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
+            batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks
+            if random_rois:
+                batch_rpn_rois[b] = rpn_rois
+                if detection_targets:
+                    batch_rois[b] = rois
+                    batch_mrcnn_class_ids[b] = mrcnn_class_ids
+                    batch_mrcnn_bbox[b] = mrcnn_bbox
+                    batch_mrcnn_mask[b] = mrcnn_mask
+            b += 1
+
+            # Batch full?
+            if b >= batch_size:
+                inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox,
+                          batch_gt_class_ids, batch_gt_boxes, batch_gt_masks]
+                outputs = []
+
+                if random_rois:
+                    inputs.extend([batch_rpn_rois])
+                    if detection_targets:
+                        inputs.extend([batch_rois])
+                        # Keras requires that output and targets have the same number of dimensions
+                        batch_mrcnn_class_ids = np.expand_dims(
+                            batch_mrcnn_class_ids, -1)
+                        outputs.extend(
+                            [batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask])
+
+                yield inputs, outputs
+
+                # start a new batch
+                b = 0
+        except (GeneratorExit, KeyboardInterrupt):
+            raise
+        except:
+            # Log it and skip the image
+            logging.exception("Error processing image {}".format(
+                dataset.image_info[image_id]))
+            error_count += 1
+            if error_count > 5:
+                raise
+
+
+############################################################
+#  MaskRCNN Class
+############################################################
+
+class MaskRCNN():
+    """Encapsulates the Mask RCNN model functionality.
+    The actual Keras model is in the keras_model property.
+    """
+
+    def __init__(self, mode, config, model_dir):
+        """
+        mode: Either "training" or "inference"
+        config: A Sub-class of the Config class
+        model_dir: Directory to save training logs and trained weights
+        """
+        assert mode in ['training', 'inference']
+        self.mode = mode
+        self.config = config
+        self.model_dir = model_dir
+        self.set_log_dir()
+        self.keras_model = self.build(mode=mode, config=config)
+
+    def build(self, mode, config):
+        """Build Mask R-CNN architecture.
+            input_shape: The shape of the input image.
+            mode: Either "training" or "inference". The inputs and
+                outputs of the model differ accordingly.
+        """
+        assert mode in ['training', 'inference']
+
+        # Image size must be dividable by 2 multiple times
+        h, w = config.IMAGE_SHAPE[:2]
+        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
+            raise Exception("Image size must be dividable by 2 at least 6 times "
+                            "to avoid fractions when downscaling and upscaling."
+                            "For example, use 256, 320, 384, 448, 512, ... etc. ")
+
+        # Inputs
+        input_image = KL.Input(
+            shape=[None, None, config.IMAGE_SHAPE[2]], name="input_image")
+        input_image_meta = KL.Input(shape=[config.IMAGE_META_SIZE],
+                                    name="input_image_meta")
+        if mode == "training":
+            # RPN GT
+            input_rpn_match = KL.Input(
+                shape=[None, 1], name="input_rpn_match", dtype=tf.int32)
+            input_rpn_bbox = KL.Input(
+                shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32)
+
+            # Detection GT (class IDs, bounding boxes, and masks)
+            # 1. GT Class IDs (zero padded)
+            input_gt_class_ids = KL.Input(
+                shape=[None], name="input_gt_class_ids", dtype=tf.int32)
+            # 2. GT Boxes in pixels (zero padded)
+            # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
+            input_gt_boxes = KL.Input(
+                shape=[None, 4], name="input_gt_boxes", dtype=tf.float32)
+            # Normalize coordinates
+            gt_boxes = KL.Lambda(lambda x: norm_boxes_graph(
+                x, K.shape(input_image)[1:3]))(input_gt_boxes)
+            # 3. GT Masks (zero padded)
+            # [batch, height, width, MAX_GT_INSTANCES]
+            if config.USE_MINI_MASK:
+                input_gt_masks = KL.Input(
+                    shape=[config.MINI_MASK_SHAPE[0],
+                           config.MINI_MASK_SHAPE[1], None],
+                    name="input_gt_masks", dtype=bool)
+            else:
+                input_gt_masks = KL.Input(
+                    shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],
+                    name="input_gt_masks", dtype=bool)
+        elif mode == "inference":
+            # Anchors in normalized coordinates
+            input_anchors = KL.Input(shape=[None, 4], name="input_anchors")
+
+        # Build the shared convolutional layers.
+        # Bottom-up Layers
+        # Returns a list of the last layers of each stage, 5 in total.
+        # Don't create the thead (stage 5), so we pick the 4th item in the list.
+        if callable(config.BACKBONE):
+            _, C2, C3, C4, C5 = config.BACKBONE(input_image, stage5=True,
+                                                train_bn=config.TRAIN_BN)
+        else:
+            _, C2, C3, C4, C5 = resnet_graph(input_image, config.BACKBONE,
+                                             stage5=True, train_bn=config.TRAIN_BN)
+        # Top-down Layers
+        # TODO: add assert to varify feature map sizes match what's in config
+        P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5)
+        P4 = KL.Add(name="fpn_p4add")([
+            KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
+            KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4)])
+        P3 = KL.Add(name="fpn_p3add")([
+            KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
+            KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3)])
+        P2 = KL.Add(name="fpn_p2add")([
+            KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
+            KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2)])
+        # Attach 3x3 conv to all P layers to get the final feature maps.
+        P2 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2")(P2)
+        P3 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3")(P3)
+        P4 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4")(P4)
+        P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5")(P5)
+        # P6 is used for the 5th anchor scale in RPN. Generated by
+        # subsampling from P5 with stride of 2.
+        P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)
+
+        # Note that P6 is used in RPN, but not in the classifier heads.
+        rpn_feature_maps = [P2, P3, P4, P5, P6]
+        mrcnn_feature_maps = [P2, P3, P4, P5]
+
+        # Anchors
+        if mode == "training":
+            anchors = self.get_anchors(config.IMAGE_SHAPE)
+            # Duplicate across the batch dimension because Keras requires it
+            # TODO: can this be optimized to avoid duplicating the anchors?
+            anchors = np.broadcast_to(anchors, (config.BATCH_SIZE,) + anchors.shape)
+            # A hack to get around Keras's bad support for constants
+            anchors = KL.Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image)
+        else:
+            anchors = input_anchors
+
+        # RPN Model
+        rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE,
+                              len(config.RPN_ANCHOR_RATIOS), config.TOP_DOWN_PYRAMID_SIZE)
+        # Loop through pyramid layers
+        layer_outputs = []  # list of lists
+        for p in rpn_feature_maps:
+            layer_outputs.append(rpn([p]))
+        # Concatenate layer outputs
+        # Convert from list of lists of level outputs to list of lists
+        # of outputs across levels.
+        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
+        output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
+        outputs = list(zip(*layer_outputs))
+        outputs = [KL.Concatenate(axis=1, name=n)(list(o))
+                   for o, n in zip(outputs, output_names)]
+
+        rpn_class_logits, rpn_class, rpn_bbox = outputs
+
+        # Generate proposals
+        # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
+        # and zero padded.
+        proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\
+            else config.POST_NMS_ROIS_INFERENCE
+        rpn_rois = ProposalLayer(
+            proposal_count=proposal_count,
+            nms_threshold=config.RPN_NMS_THRESHOLD,
+            name="ROI",
+            config=config)([rpn_class, rpn_bbox, anchors])
+
+        if mode == "training":
+            # Class ID mask to mark class IDs supported by the dataset the image
+            # came from.
+            active_class_ids = KL.Lambda(
+                lambda x: parse_image_meta_graph(x)["active_class_ids"]
+                )(input_image_meta)
+
+            if not config.USE_RPN_ROIS:
+                # Ignore predicted ROIs and use ROIs provided as an input.
+                input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4],
+                                      name="input_roi", dtype=np.int32)
+                # Normalize coordinates
+                target_rois = KL.Lambda(lambda x: norm_boxes_graph(
+                    x, K.shape(input_image)[1:3]))(input_rois)
+            else:
+                target_rois = rpn_rois
+
+            # Generate detection targets
+            # Subsamples proposals and generates target outputs for training
+            # Note that proposal class IDs, gt_boxes, and gt_masks are zero
+            # padded. Equally, returned rois and targets are zero padded.
+            rois, target_class_ids, target_bbox, target_mask =\
+                DetectionTargetLayer(config, name="proposal_targets")([
+                    target_rois, input_gt_class_ids, gt_boxes, input_gt_masks])
+
+            # Network Heads
+            # TODO: verify that this handles zero padded ROIs
+            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
+                fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta,
+                                     config.POOL_SIZE, config.NUM_CLASSES,
+                                     train_bn=config.TRAIN_BN,
+                                     fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
+
+            mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps,
+                                              input_image_meta,
+                                              config.MASK_POOL_SIZE,
+                                              config.NUM_CLASSES,
+                                              train_bn=config.TRAIN_BN)
+
+            # TODO: clean up (use tf.identify if necessary)
+            output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois)
+
+            # Losses
+            rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")(
+                [input_rpn_match, rpn_class_logits])
+            rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")(
+                [input_rpn_bbox, input_rpn_match, rpn_bbox])
+            class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")(
+                [target_class_ids, mrcnn_class_logits, active_class_ids])
+            bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")(
+                [target_bbox, target_class_ids, mrcnn_bbox])
+            mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")(
+                [target_mask, target_class_ids, mrcnn_mask])
+
+            # Model
+            inputs = [input_image, input_image_meta,
+                      input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks]
+            if not config.USE_RPN_ROIS:
+                inputs.append(input_rois)
+            outputs = [rpn_class_logits, rpn_class, rpn_bbox,
+                       mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask,
+                       rpn_rois, output_rois,
+                       rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss]
+            model = KM.Model(inputs, outputs, name='mask_rcnn')
+        else:
+            # Network Heads
+            # Proposal classifier and BBox regressor heads
+            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
+                fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta,
+                                     config.POOL_SIZE, config.NUM_CLASSES,
+                                     train_bn=config.TRAIN_BN,
+                                     fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
+
+            # Detections
+            # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
+            # normalized coordinates
+            detections = DetectionLayer(config, name="mrcnn_detection")(
+                [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])
+
+            # Create masks for detections
+            detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
+            mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps,
+                                              input_image_meta,
+                                              config.MASK_POOL_SIZE,
+                                              config.NUM_CLASSES,
+                                              train_bn=config.TRAIN_BN)
+
+            model = KM.Model([input_image, input_image_meta, input_anchors],
+                             [detections, mrcnn_class, mrcnn_bbox,
+                                 mrcnn_mask, rpn_rois, rpn_class, rpn_bbox],
+                             name='mask_rcnn')
+
+        # Add multi-GPU support.
+        if config.GPU_COUNT > 1:
+            from mrcnn.parallel_model import ParallelModel
+            model = ParallelModel(model, config.GPU_COUNT)
+
+        return model
+
+    def find_last(self):
+        """Finds the last checkpoint file of the last trained model in the
+        model directory.
+        Returns:
+            The path of the last checkpoint file
+        """
+        # Get directory names. Each directory corresponds to a model
+        dir_names = next(os.walk(self.model_dir))[1]
+        key = self.config.NAME.lower()
+        dir_names = filter(lambda f: f.startswith(key), dir_names)
+        dir_names = sorted(dir_names)
+        if not dir_names:
+            import errno
+            raise FileNotFoundError(
+                errno.ENOENT,
+                "Could not find model directory under {}".format(self.model_dir))
+        # Pick last directory
+        dir_name = os.path.join(self.model_dir, dir_names[-1])
+        # Find the last checkpoint
+        checkpoints = next(os.walk(dir_name))[2]
+        checkpoints = filter(lambda f: f.startswith("mask_rcnn"), checkpoints)
+        checkpoints = sorted(checkpoints)
+        if not checkpoints:
+            import errno
+            raise FileNotFoundError(
+                errno.ENOENT, "Could not find weight files in {}".format(dir_name))
+        checkpoint = os.path.join(dir_name, checkpoints[-1])
+        return checkpoint
+
+    def load_weights(self, filepath, by_name=False, exclude=None):
+        """Modified version of the corresponding Keras function with
+        the addition of multi-GPU support and the ability to exclude
+        some layers from loading.
+        exclude: list of layer names to exclude
+        """
+        import h5py
+        # Conditional import to support versions of Keras before 2.2
+        # TODO: remove in about 6 months (end of 2018)
+        try:
+            from keras.engine import saving
+        except ImportError:
+            # Keras before 2.2 used the 'topology' namespace.
+            from keras.engine import topology as saving
+
+        if exclude:
+            by_name = True
+
+        if h5py is None:
+            raise ImportError('`load_weights` requires h5py.')
+        f = h5py.File(filepath, mode='r')
+        if 'layer_names' not in f.attrs and 'model_weights' in f:
+            f = f['model_weights']
+
+        # In multi-GPU training, we wrap the model. Get layers
+        # of the inner model because they have the weights.
+        keras_model = self.keras_model
+        layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\
+            else keras_model.layers
+
+        # Exclude some layers
+        if exclude:
+            layers = filter(lambda l: l.name not in exclude, layers)
+
+        if by_name:
+            saving.load_weights_from_hdf5_group_by_name(f, layers)
+        else:
+            saving.load_weights_from_hdf5_group(f, layers)
+        if hasattr(f, 'close'):
+            f.close()
+
+        # Update the log directory
+        self.set_log_dir(filepath)
+
+    def get_imagenet_weights(self):
+        """Downloads ImageNet trained weights from Keras.
+        Returns path to weights file.
+        """
+        from keras.utils.data_utils import get_file
+        TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/'\
+                                 'releases/download/v0.2/'\
+                                 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
+        weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
+                                TF_WEIGHTS_PATH_NO_TOP,
+                                cache_subdir='models',
+                                md5_hash='a268eb855778b3df3c7506639542a6af')
+        return weights_path
+
+    def compile(self, learning_rate, momentum):
+        """Gets the model ready for training. Adds losses, regularization, and
+        metrics. Then calls the Keras compile() function.
+        """
+        # Optimizer object
+        optimizer = keras.optimizers.SGD(
+            lr=learning_rate, momentum=momentum,
+            clipnorm=self.config.GRADIENT_CLIP_NORM)
+        # Add Losses
+        # First, clear previously set losses to avoid duplication
+        self.keras_model._losses = []
+        self.keras_model._per_input_losses = {}
+        loss_names = [
+            "rpn_class_loss",  "rpn_bbox_loss",
+            "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss"]
+        for name in loss_names:
+            layer = self.keras_model.get_layer(name)
+            if layer.output in self.keras_model.losses:
+                continue
+            loss = (
+                tf.reduce_mean(layer.output, keepdims=True)
+                * self.config.LOSS_WEIGHTS.get(name, 1.))
+            self.keras_model.add_loss(loss)
+
+        # Add L2 Regularization
+        # Skip gamma and beta weights of batch normalization layers.
+        reg_losses = [
+            keras.regularizers.l2(self.config.WEIGHT_DECAY)(w) / tf.cast(tf.size(w), tf.float32)
+            for w in self.keras_model.trainable_weights
+            if 'gamma' not in w.name and 'beta' not in w.name]
+        self.keras_model.add_loss(tf.add_n(reg_losses))
+
+        # Compile
+        self.keras_model.compile(
+            optimizer=optimizer,
+            loss=[None] * len(self.keras_model.outputs))
+
+        # Add metrics for losses
+        for name in loss_names:
+            if name in self.keras_model.metrics_names:
+                continue
+            layer = self.keras_model.get_layer(name)
+            self.keras_model.metrics_names.append(name)
+            loss = (
+                tf.reduce_mean(layer.output, keepdims=True)
+                * self.config.LOSS_WEIGHTS.get(name, 1.))
+            self.keras_model.metrics_tensors.append(loss)
+
+    def set_trainable(self, layer_regex, keras_model=None, indent=0, verbose=1):
+        """Sets model layers as trainable if their names match
+        the given regular expression.
+        """
+        # Print message on the first call (but not on recursive calls)
+        if verbose > 0 and keras_model is None:
+            log("Selecting layers to train")
+
+        keras_model = keras_model or self.keras_model
+
+        # In multi-GPU training, we wrap the model. Get layers
+        # of the inner model because they have the weights.
+        layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\
+            else keras_model.layers
+
+        for layer in layers:
+            # Is the layer a model?
+            if layer.__class__.__name__ == 'Model':
+                print("In model: ", layer.name)
+                self.set_trainable(
+                    layer_regex, keras_model=layer, indent=indent + 4)
+                continue
+
+            if not layer.weights:
+                continue
+            # Is it trainable?
+            trainable = bool(re.fullmatch(layer_regex, layer.name))
+            # Update layer. If layer is a container, update inner layer.
+            if layer.__class__.__name__ == 'TimeDistributed':
+                layer.layer.trainable = trainable
+            else:
+                layer.trainable = trainable
+            # Print trainable layer names
+            if trainable and verbose > 0:
+                log("{}{:20}   ({})".format(" " * indent, layer.name,
+                                            layer.__class__.__name__))
+
+    def set_log_dir(self, model_path=None):
+        """Sets the model log directory and epoch counter.
+        model_path: If None, or a format different from what this code uses
+            then set a new log directory and start epochs from 0. Otherwise,
+            extract the log directory and the epoch counter from the file
+            name.
+        """
+        # Set date and epoch counter as if starting a new model
+        self.epoch = 0
+        now = datetime.datetime.now()
+
+        # If we have a model path with date and epochs use them
+        if model_path:
+            # Continue from we left of. Get epoch and date from the file name
+            # A sample model path might look like:
+            # \path\to\logs\coco20171029T2315\mask_rcnn_coco_0001.h5 (Windows)
+            # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 (Linux)
+            regex = r".*[/\\][\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})[/\\]mask\_rcnn\_[\w-]+(\d{4})\.h5"
+            m = re.match(regex, model_path)
+            if m:
+                now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
+                                        int(m.group(4)), int(m.group(5)))
+                # Epoch number in file is 1-based, and in Keras code it's 0-based.
+                # So, adjust for that then increment by one to start from the next epoch
+                self.epoch = int(m.group(6)) - 1 + 1
+                print('Re-starting from epoch %d' % self.epoch)
+
+        # Directory for training logs
+        self.log_dir = os.path.join(self.model_dir, "{}{:%Y%m%dT%H%M}".format(
+            self.config.NAME.lower(), now))
+
+        # Path to save after each epoch. Include placeholders that get filled by Keras.
+        self.checkpoint_path = os.path.join(self.log_dir, "mask_rcnn_{}_*epoch*.h5".format(
+            self.config.NAME.lower()))
+        self.checkpoint_path = self.checkpoint_path.replace(
+            "*epoch*", "{epoch:04d}")
+
+    def train(self, train_dataset, val_dataset, learning_rate, epochs, layers,
+              augmentation=None, custom_callbacks=None, no_augmentation_sources=None):
+        """Train the model.
+        train_dataset, val_dataset: Training and validation Dataset objects.
+        learning_rate: The learning rate to train with
+        epochs: Number of training epochs. Note that previous training epochs
+                are considered to be done alreay, so this actually determines
+                the epochs to train in total rather than in this particaular
+                call.
+        layers: Allows selecting wich layers to train. It can be:
+            - A regular expression to match layer names to train
+            - One of these predefined values:
+              heads: The RPN, classifier and mask heads of the network
+              all: All the layers
+              3+: Train Resnet stage 3 and up
+              4+: Train Resnet stage 4 and up
+              5+: Train Resnet stage 5 and up
+        augmentation: Optional. An imgaug (https://github.com/aleju/imgaug)
+            augmentation. For example, passing imgaug.augmenters.Fliplr(0.5)
+            flips images right/left 50% of the time. You can pass complex
+            augmentations as well. This augmentation applies 50% of the
+            time, and when it does it flips images right/left half the time
+            and adds a Gaussian blur with a random sigma in range 0 to 5.
+                augmentation = imgaug.augmenters.Sometimes(0.5, [
+                    imgaug.augmenters.Fliplr(0.5),
+                    imgaug.augmenters.GaussianBlur(sigma=(0.0, 5.0))
+                ])
+	    custom_callbacks: Optional. Add custom callbacks to be called
+	        with the keras fit_generator method. Must be list of type keras.callbacks.
+        no_augmentation_sources: Optional. List of sources to exclude for
+            augmentation. A source is string that identifies a dataset and is
+            defined in the Dataset class.
+        """
+        assert self.mode == "training", "Create model in training mode."
+
+        # Pre-defined layer regular expressions
+        layer_regex = {
+            # all layers but the backbone
+            "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
+            # From a specific Resnet stage and up
+            "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
+            "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
+            "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
+            # All layers
+            "all": ".*",
+        }
+        if layers in layer_regex.keys():
+            layers = layer_regex[layers]
+
+        # Data generators
+        train_generator = data_generator(train_dataset, self.config, shuffle=True,
+                                         augmentation=augmentation,
+                                         batch_size=self.config.BATCH_SIZE,
+                                         no_augmentation_sources=no_augmentation_sources)
+        val_generator = data_generator(val_dataset, self.config, shuffle=True,
+                                       batch_size=self.config.BATCH_SIZE)
+
+        # Create log_dir if it does not exist
+        if not os.path.exists(self.log_dir):
+            os.makedirs(self.log_dir)
+
+        # Callbacks
+        callbacks = [
+            keras.callbacks.TensorBoard(log_dir=self.log_dir,
+                                        histogram_freq=0, write_graph=True, write_images=False),
+            keras.callbacks.ModelCheckpoint(self.checkpoint_path,
+                                            verbose=0, save_weights_only=True),
+        ]
+
+        # Add custom callbacks to the list
+        if custom_callbacks:
+            callbacks += custom_callbacks
+
+        # Train
+        log("\nStarting at epoch {}. LR={}\n".format(self.epoch, learning_rate))
+        log("Checkpoint Path: {}".format(self.checkpoint_path))
+        self.set_trainable(layers)
+        self.compile(learning_rate, self.config.LEARNING_MOMENTUM)
+
+        # Work-around for Windows: Keras fails on Windows when using
+        # multiprocessing workers. See discussion here:
+        # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009
+        if os.name is 'nt':
+            workers = 0
+        else:
+            workers = multiprocessing.cpu_count()
+
+        self.keras_model.fit_generator(
+            train_generator,
+            initial_epoch=self.epoch,
+            epochs=epochs,
+            steps_per_epoch=self.config.STEPS_PER_EPOCH,
+            callbacks=callbacks,
+            validation_data=val_generator,
+            validation_steps=self.config.VALIDATION_STEPS,
+            max_queue_size=100,
+            workers=workers,
+            use_multiprocessing=True,
+        )
+        self.epoch = max(self.epoch, epochs)
+
+    def mold_inputs(self, images):
+        """Takes a list of images and modifies them to the format expected
+        as an input to the neural network.
+        images: List of image matrices [height,width,depth]. Images can have
+            different sizes.
+        Returns 3 Numpy matrices:
+        molded_images: [N, h, w, 3]. Images resized and normalized.
+        image_metas: [N, length of meta data]. Details about each image.
+        windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the
+            original image (padding excluded).
+        """
+        molded_images = []
+        image_metas = []
+        windows = []
+        for image in images:
+            # Resize image
+            # TODO: move resizing to mold_image()
+            molded_image, window, scale, padding, crop = utils.resize_image(
+                image,
+                min_dim=self.config.IMAGE_MIN_DIM,
+                min_scale=self.config.IMAGE_MIN_SCALE,
+                max_dim=self.config.IMAGE_MAX_DIM,
+                mode=self.config.IMAGE_RESIZE_MODE)
+            molded_image = mold_image(molded_image, self.config)
+            # Build image_meta
+            image_meta = compose_image_meta(
+                0, image.shape, molded_image.shape, window, scale,
+                np.zeros([self.config.NUM_CLASSES], dtype=np.int32))
+            # Append
+            molded_images.append(molded_image)
+            windows.append(window)
+            image_metas.append(image_meta)
+        # Pack into arrays
+        molded_images = np.stack(molded_images)
+        image_metas = np.stack(image_metas)
+        windows = np.stack(windows)
+        return molded_images, image_metas, windows
+
+    def unmold_detections(self, detections, mrcnn_mask, original_image_shape,
+                          image_shape, window):
+        """Reformats the detections of one image from the format of the neural
+        network output to a format suitable for use in the rest of the
+        application.
+        detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates
+        mrcnn_mask: [N, height, width, num_classes]
+        original_image_shape: [H, W, C] Original image shape before resizing
+        image_shape: [H, W, C] Shape of the image after resizing and padding
+        window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real
+                image is excluding the padding.
+        Returns:
+        boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels
+        class_ids: [N] Integer class IDs for each bounding box
+        scores: [N] Float probability scores of the class_id
+        masks: [height, width, num_instances] Instance masks
+        """
+        # How many detections do we have?
+        # Detections array is padded with zeros. Find the first class_id == 0.
+        zero_ix = np.where(detections[:, 4] == 0)[0]
+        N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0]
+
+        # Extract boxes, class_ids, scores, and class-specific masks
+        boxes = detections[:N, :4]
+        class_ids = detections[:N, 4].astype(np.int32)
+        scores = detections[:N, 5]
+        masks = mrcnn_mask[np.arange(N), :, :, class_ids]
+
+        # Translate normalized coordinates in the resized image to pixel
+        # coordinates in the original image before resizing
+        window = utils.norm_boxes(window, image_shape[:2])
+        wy1, wx1, wy2, wx2 = window
+        shift = np.array([wy1, wx1, wy1, wx1])
+        wh = wy2 - wy1  # window height
+        ww = wx2 - wx1  # window width
+        scale = np.array([wh, ww, wh, ww])
+        # Convert boxes to normalized coordinates on the window
+        boxes = np.divide(boxes - shift, scale)
+        # Convert boxes to pixel coordinates on the original image
+        boxes = utils.denorm_boxes(boxes, original_image_shape[:2])
+
+        # Filter out detections with zero area. Happens in early training when
+        # network weights are still random
+        exclude_ix = np.where(
+            (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0]
+        if exclude_ix.shape[0] > 0:
+            boxes = np.delete(boxes, exclude_ix, axis=0)
+            class_ids = np.delete(class_ids, exclude_ix, axis=0)
+            scores = np.delete(scores, exclude_ix, axis=0)
+            masks = np.delete(masks, exclude_ix, axis=0)
+            N = class_ids.shape[0]
+
+        # Resize masks to original image size and set boundary threshold.
+        full_masks = []
+        for i in range(N):
+            # Convert neural network mask to full size mask
+            full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape)
+            full_masks.append(full_mask)
+        full_masks = np.stack(full_masks, axis=-1)\
+            if full_masks else np.empty(original_image_shape[:2] + (0,))
+
+        return boxes, class_ids, scores, full_masks
+
+    def detect(self, images, verbose=0):
+        """Runs the detection pipeline.
+        images: List of images, potentially of different sizes.
+        Returns a list of dicts, one dict per image. The dict contains:
+        rois: [N, (y1, x1, y2, x2)] detection bounding boxes
+        class_ids: [N] int class IDs
+        scores: [N] float probability scores for the class IDs
+        masks: [H, W, N] instance binary masks
+        """
+        assert self.mode == "inference", "Create model in inference mode."
+        assert len(
+            images) == self.config.BATCH_SIZE, "len(images) must be equal to BATCH_SIZE"
+
+        if verbose:
+            log("Processing {} images".format(len(images)))
+            for image in images:
+                log("image", image)
+
+        # Mold inputs to format expected by the neural network
+        molded_images, image_metas, windows = self.mold_inputs(images)
+
+        # Validate image sizes
+        # All images in a batch MUST be of the same size
+        image_shape = molded_images[0].shape
+        for g in molded_images[1:]:
+            assert g.shape == image_shape,\
+                "After resizing, all images must have the same size. Check IMAGE_RESIZE_MODE and image sizes."
+
+        # Anchors
+        anchors = self.get_anchors(image_shape)
+        # Duplicate across the batch dimension because Keras requires it
+        # TODO: can this be optimized to avoid duplicating the anchors?
+        anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape)
+
+        if verbose:
+            log("molded_images", molded_images)
+            log("image_metas", image_metas)
+            log("anchors", anchors)
+        # Run object detection
+        detections, _, _, mrcnn_mask, _, _, _ =\
+            self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
+        # Process detections
+        results = []
+        for i, image in enumerate(images):
+            final_rois, final_class_ids, final_scores, final_masks =\
+                self.unmold_detections(detections[i], mrcnn_mask[i],
+                                       image.shape, molded_images[i].shape,
+                                       windows[i])
+            results.append({
+                "rois": final_rois,
+                "class_ids": final_class_ids,
+                "scores": final_scores,
+                "masks": final_masks,
+            })
+        return results
+
+    def detect_molded(self, molded_images, image_metas, verbose=0):
+        """Runs the detection pipeline, but expect inputs that are
+        molded already. Used mostly for debugging and inspecting
+        the model.
+        molded_images: List of images loaded using load_image_gt()
+        image_metas: image meta data, also returned by load_image_gt()
+        Returns a list of dicts, one dict per image. The dict contains:
+        rois: [N, (y1, x1, y2, x2)] detection bounding boxes
+        class_ids: [N] int class IDs
+        scores: [N] float probability scores for the class IDs
+        masks: [H, W, N] instance binary masks
+        """
+        assert self.mode == "inference", "Create model in inference mode."
+        assert len(molded_images) == self.config.BATCH_SIZE,\
+            "Number of images must be equal to BATCH_SIZE"
+
+        if verbose:
+            log("Processing {} images".format(len(molded_images)))
+            for image in molded_images:
+                log("image", image)
+
+        # Validate image sizes
+        # All images in a batch MUST be of the same size
+        image_shape = molded_images[0].shape
+        for g in molded_images[1:]:
+            assert g.shape == image_shape, "Images must have the same size"
+
+        # Anchors
+        anchors = self.get_anchors(image_shape)
+        # Duplicate across the batch dimension because Keras requires it
+        # TODO: can this be optimized to avoid duplicating the anchors?
+        anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape)
+
+        if verbose:
+            log("molded_images", molded_images)
+            log("image_metas", image_metas)
+            log("anchors", anchors)
+        # Run object detection
+        detections, _, _, mrcnn_mask, _, _, _ =\
+            self.keras_model.predict([molded_images, image_metas, anchors], verbose=0)
+        # Process detections
+        results = []
+        for i, image in enumerate(molded_images):
+            window = [0, 0, image.shape[0], image.shape[1]]
+            final_rois, final_class_ids, final_scores, final_masks =\
+                self.unmold_detections(detections[i], mrcnn_mask[i],
+                                       image.shape, molded_images[i].shape,
+                                       window)
+            results.append({
+                "rois": final_rois,
+                "class_ids": final_class_ids,
+                "scores": final_scores,
+                "masks": final_masks,
+            })
+        return results
+
+    def get_anchors(self, image_shape):
+        """Returns anchor pyramid for the given image size."""
+        backbone_shapes = compute_backbone_shapes(self.config, image_shape)
+        # Cache anchors and reuse if image shape is the same
+        if not hasattr(self, "_anchor_cache"):
+            self._anchor_cache = {}
+        if not tuple(image_shape) in self._anchor_cache:
+            # Generate Anchors
+            a = utils.generate_pyramid_anchors(
+                self.config.RPN_ANCHOR_SCALES,
+                self.config.RPN_ANCHOR_RATIOS,
+                backbone_shapes,
+                self.config.BACKBONE_STRIDES,
+                self.config.RPN_ANCHOR_STRIDE)
+            # Keep a copy of the latest anchors in pixel coordinates because
+            # it's used in inspect_model notebooks.
+            # TODO: Remove this after the notebook are refactored to not use it
+            self.anchors = a
+            # Normalize coordinates
+            self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(a, image_shape[:2])
+        return self._anchor_cache[tuple(image_shape)]
+
+    def ancestor(self, tensor, name, checked=None):
+        """Finds the ancestor of a TF tensor in the computation graph.
+        tensor: TensorFlow symbolic tensor.
+        name: Name of ancestor tensor to find
+        checked: For internal use. A list of tensors that were already
+                 searched to avoid loops in traversing the graph.
+        """
+        checked = checked if checked is not None else []
+        # Put a limit on how deep we go to avoid very long loops
+        if len(checked) > 500:
+            return None
+        # Convert name to a regex and allow matching a number prefix
+        # because Keras adds them automatically
+        if isinstance(name, str):
+            name = re.compile(name.replace("/", r"(\_\d+)*/"))
+
+        parents = tensor.op.inputs
+        for p in parents:
+            if p in checked:
+                continue
+            if bool(re.fullmatch(name, p.name)):
+                return p
+            checked.append(p)
+            a = self.ancestor(p, name, checked)
+            if a is not None:
+                return a
+        return None
+
+    def find_trainable_layer(self, layer):
+        """If a layer is encapsulated by another layer, this function
+        digs through the encapsulation and returns the layer that holds
+        the weights.
+        """
+        if layer.__class__.__name__ == 'TimeDistributed':
+            return self.find_trainable_layer(layer.layer)
+        return layer
+
+    def get_trainable_layers(self):
+        """Returns a list of layers that have weights."""
+        layers = []
+        # Loop through all layers
+        for l in self.keras_model.layers:
+            # If layer is a wrapper, find inner trainable layer
+            l = self.find_trainable_layer(l)
+            # Include layer if it has weights
+            if l.get_weights():
+                layers.append(l)
+        return layers
+
+    def run_graph(self, images, outputs, image_metas=None):
+        """Runs a sub-set of the computation graph that computes the given
+        outputs.
+        image_metas: If provided, the images are assumed to be already
+            molded (i.e. resized, padded, and normalized)
+        outputs: List of tuples (name, tensor) to compute. The tensors are
+            symbolic TensorFlow tensors and the names are for easy tracking.
+        Returns an ordered dict of results. Keys are the names received in the
+        input and values are Numpy arrays.
+        """
+        model = self.keras_model
+
+        # Organize desired outputs into an ordered dict
+        outputs = OrderedDict(outputs)
+        for o in outputs.values():
+            assert o is not None
+
+        # Build a Keras function to run parts of the computation graph
+        inputs = model.inputs
+        if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
+            inputs += [K.learning_phase()]
+        kf = K.function(model.inputs, list(outputs.values()))
+
+        # Prepare inputs
+        if image_metas is None:
+            molded_images, image_metas, _ = self.mold_inputs(images)
+        else:
+            molded_images = images
+        image_shape = molded_images[0].shape
+        # Anchors
+        anchors = self.get_anchors(image_shape)
+        # Duplicate across the batch dimension because Keras requires it
+        # TODO: can this be optimized to avoid duplicating the anchors?
+        anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape)
+        model_in = [molded_images, image_metas, anchors]
+
+        # Run inference
+        if model.uses_learning_phase and not isinstance(K.learning_phase(), int):
+            model_in.append(0.)
+        outputs_np = kf(model_in)
+
+        # Pack the generated Numpy arrays into a a dict and log the results.
+        outputs_np = OrderedDict([(k, v)
+                                  for k, v in zip(outputs.keys(), outputs_np)])
+        for k, v in outputs_np.items():
+            log(k, v)
+        return outputs_np
+
+
+############################################################
+#  Data Formatting
+############################################################
+
+def compose_image_meta(image_id, original_image_shape, image_shape,
+                       window, scale, active_class_ids):
+    """Takes attributes of an image and puts them in one 1D array.
+    image_id: An int ID of the image. Useful for debugging.
+    original_image_shape: [H, W, C] before resizing or padding.
+    image_shape: [H, W, C] after resizing and padding
+    window: (y1, x1, y2, x2) in pixels. The area of the image where the real
+            image is (excluding the padding)
+    scale: The scaling factor applied to the original image (float32)
+    active_class_ids: List of class_ids available in the dataset from which
+        the image came. Useful if training on images from multiple datasets
+        where not all classes are present in all datasets.
+    """
+    meta = np.array(
+        [image_id] +                  # size=1
+        list(original_image_shape) +  # size=3
+        list(image_shape) +           # size=3
+        list(window) +                # size=4 (y1, x1, y2, x2) in image cooredinates
+        [scale] +                     # size=1
+        list(active_class_ids)        # size=num_classes
+    )
+    return meta
+
+
+def parse_image_meta(meta):
+    """Parses an array that contains image attributes to its components.
+    See compose_image_meta() for more details.
+    meta: [batch, meta length] where meta length depends on NUM_CLASSES
+    Returns a dict of the parsed values.
+    """
+    image_id = meta[:, 0]
+    original_image_shape = meta[:, 1:4]
+    image_shape = meta[:, 4:7]
+    window = meta[:, 7:11]  # (y1, x1, y2, x2) window of image in in pixels
+    scale = meta[:, 11]
+    active_class_ids = meta[:, 12:]
+    return {
+        "image_id": image_id.astype(np.int32),
+        "original_image_shape": original_image_shape.astype(np.int32),
+        "image_shape": image_shape.astype(np.int32),
+        "window": window.astype(np.int32),
+        "scale": scale.astype(np.float32),
+        "active_class_ids": active_class_ids.astype(np.int32),
+    }
+
+
+def parse_image_meta_graph(meta):
+    """Parses a tensor that contains image attributes to its components.
+    See compose_image_meta() for more details.
+    meta: [batch, meta length] where meta length depends on NUM_CLASSES
+    Returns a dict of the parsed tensors.
+    """
+    image_id = meta[:, 0]
+    original_image_shape = meta[:, 1:4]
+    image_shape = meta[:, 4:7]
+    window = meta[:, 7:11]  # (y1, x1, y2, x2) window of image in in pixels
+    scale = meta[:, 11]
+    active_class_ids = meta[:, 12:]
+    return {
+        "image_id": image_id,
+        "original_image_shape": original_image_shape,
+        "image_shape": image_shape,
+        "window": window,
+        "scale": scale,
+        "active_class_ids": active_class_ids,
+    }
+
+
+def mold_image(images, config):
+    """Expects an RGB image (or array of images) and subtracts
+    the mean pixel and converts it to float. Expects image
+    colors in RGB order.
+    """
+    return images.astype(np.float32) - config.MEAN_PIXEL
+
+
+def unmold_image(normalized_images, config):
+    """Takes a image normalized with mold() and returns the original."""
+    return (normalized_images + config.MEAN_PIXEL).astype(np.uint8)
+
+
+############################################################
+#  Miscellenous Graph Functions
+############################################################
+
+def trim_zeros_graph(boxes, name='trim_zeros'):
+    """Often boxes are represented with matrices of shape [N, 4] and
+    are padded with zeros. This removes zero boxes.
+    boxes: [N, 4] matrix of boxes.
+    non_zeros: [N] a 1D boolean mask identifying the rows to keep
+    """
+    non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool)
+    boxes = tf.boolean_mask(boxes, non_zeros, name=name)
+    return boxes, non_zeros
+
+
+def batch_pack_graph(x, counts, num_rows):
+    """Picks different number of values from each row
+    in x depending on the values in counts.
+    """
+    outputs = []
+    for i in range(num_rows):
+        outputs.append(x[i, :counts[i]])
+    return tf.concat(outputs, axis=0)
+
+
+def norm_boxes_graph(boxes, shape):
+    """Converts boxes from pixel coordinates to normalized coordinates.
+    boxes: [..., (y1, x1, y2, x2)] in pixel coordinates
+    shape: [..., (height, width)] in pixels
+    Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
+    coordinates it's inside the box.
+    Returns:
+        [..., (y1, x1, y2, x2)] in normalized coordinates
+    """
+    h, w = tf.split(tf.cast(shape, tf.float32), 2)
+    scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
+    shift = tf.constant([0., 0., 1., 1.])
+    return tf.divide(boxes - shift, scale)
+
+
+def denorm_boxes_graph(boxes, shape):
+    """Converts boxes from normalized coordinates to pixel coordinates.
+    boxes: [..., (y1, x1, y2, x2)] in normalized coordinates
+    shape: [..., (height, width)] in pixels
+    Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
+    coordinates it's inside the box.
+    Returns:
+        [..., (y1, x1, y2, x2)] in pixel coordinates
+    """
+    h, w = tf.split(tf.cast(shape, tf.float32), 2)
+    scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
+    shift = tf.constant([0., 0., 1., 1.])
+    return tf.cast(tf.round(tf.multiply(boxes, scale) + shift), tf.int32)
\ No newline at end of file
diff --git a/src/tracker/mrcnn/mrcnn_color.py b/src/tracker/mrcnn/mrcnn_color.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cacce7810e4328e026dd63cd08529bb965c2441
--- /dev/null
+++ b/src/tracker/mrcnn/mrcnn_color.py
@@ -0,0 +1,106 @@
+from src.tracker.mrcnn.config import Config
+import src.tracker.mrcnn.model as modellib
+import numpy as np
+import random
+import colorsys
+import pytorch_lightning as pl
+
+from src.tracker.signboard_segment.det_models.model import POIDetection
+from src.tracker.signboard_segment.datasets_signboard_detection.datamodule import POIDataModule
+from src.tracker.signboard_segment.det_models.inference_signboard_detection import POIDetectionTask
+
+COCO_CLASSES = ['BG', 'signboard']
+
+
+class InferenceConfig(Config):
+    NAME = "my_inference"
+
+    GPU_COUNT = 1
+    IMAGES_PER_GPU = 1
+
+    NUM_CLASSES = 1 + 1  # background + 80 shapes
+
+
+class MRCNN(object):
+    def __init__(self, model_path, image_size, min_score):
+        self.gpu_num = 1
+        self.image_size = image_size
+        self.score = min_score
+        self.class_names = COCO_CLASSES
+        self.colors = self._random_colors(len(COCO_CLASSES))
+        self.model_ = self._model_load_(model_path)
+        self.task = POIDetectionTask(self.model_)
+        self.trainer = pl.Trainer(gpus=0, max_epochs=-1)
+
+    def _model_load_(self, model_path="checkpoint/signboard_ss.ckpt"):
+        model = POIDetection.load_from_checkpoint(checkpoint_path=model_path)
+        return model
+
+    def _random_colors(self, N):
+        hsv_tuples = [(1.0 * x / N, 1., 1.) for x in range(N)]
+        colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
+        colors = list(
+            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
+        random.seed(10)
+        random.shuffle(colors)
+        random.seed(None)
+        return colors
+
+    def apply_mask(self, image, mask, color, alpha=0.4):
+        for n, c in enumerate(color):
+            image[:, :, n] = np.where(
+                mask == 1,
+                image[:, :, n] * (1 - alpha) + alpha * c,
+                image[:, :, n]
+            )
+        return image
+
+    def detect_result_(self, image, min_score=0.2):
+        dm = POIDataModule(data=image, seed=42)
+        dm.setup("predict")
+    
+        self.trainer.predict(self.task, datamodule=dm)
+        
+        results = self.task.output
+        boxes = results['rois']
+        masks = results['masks']
+        class_ids = results['class_ids']
+        classes_scores = results['scores']
+
+        return_boxes = []
+        return_scores = []
+        return_masks = []
+        return_class_names = []
+        return_class_ids = []
+        return_class_color = []
+
+        for i, box in enumerate(boxes):
+            class_id = class_ids[i]
+            classes_score = classes_scores[i]
+
+            if classes_score < min_score:
+                continue
+
+            x1, y1, x2, y2 = box
+            return_boxes.append([x1, y1, (x2 - x1), (y2 - y1)])
+            return_scores.append(classes_score)
+            return_masks.append(masks[i])
+            # print(masks[i].max())
+            return_class_names.append(self.class_names[class_id])
+            return_class_ids.append(class_id)
+            return_class_color.append(self.colors[class_id])
+            
+        return_boxes = np.array(return_boxes)
+        return_scores = np.array(return_scores)
+        return_class_names = np.array(return_class_names)
+        return_class_ids = np.array(return_class_ids)
+        # return_masks = np.array(return_masks)
+        return_class_color = np.array(return_class_color)
+        
+        # return return_boxes, return_scores, return_class_names, return_class_ids, return_masks, return_class_color
+        return return_boxes, return_scores, return_class_names, return_class_ids, return_class_color
+
+
+def isInSide(point, box):
+    # print(box[0] <= point[0] <= box[2] , box[1] <= point[1] <= box[3])
+    return box[0] <= point[0] <= box[2] and box[1] <= point[1] <= box[3]
diff --git a/src/tracker/mrcnn/mrcnn_colors.py b/src/tracker/mrcnn/mrcnn_colors.py
new file mode 100644
index 0000000000000000000000000000000000000000..2eba2c208e44003579b86de496874d0e8be17e05
--- /dev/null
+++ b/src/tracker/mrcnn/mrcnn_colors.py
@@ -0,0 +1,105 @@
+from mrcnn.config import Config
+import mrcnn.model as modellib
+import numpy as np
+import cv2
+import random
+import base64
+import colorsys
+import os
+
+COCO_CLASSES = ['signboard']
+
+class InferenceConfig(Config):
+    NAME = "my_inference"
+
+    GPU_COUNT = 1
+    IMAGES_PER_GPU = 1
+
+    NUM_CLASSES = 1 + 1  # background + 80 shapes
+
+class MRCNN(object):
+    def __init__(self, model_path, image_size, min_score):
+        self.gpu_num = 1
+        self.image_size = image_size
+        self.score = min_score
+        self.class_names = COCO_CLASSES
+        self.model = self._model_load(model_path)
+
+    def _model_load(self, model_path="model_data/mask_rcnn_coco.h5"):
+        inference_config = InferenceConfig()
+        inference_config.IMAGE_MIN_DIM = self.image_size
+        inference_config.IMAGE_MAX_DIM = self.image_size
+        inference_config.display()
+
+        inference_model = modellib.MaskRCNN(mode="inference",
+                                            config=inference_config,
+                                            model_dir="logs")
+
+        inference_model.load_weights(model_path, by_name=True)
+        inference_model.keras_model._make_predict_function()
+
+        return inference_model
+    
+    def random_colors(self, N, bright=True):
+        """
+        Generate random colors.
+        To get visually distinct colors, generate them in HSV space then
+        convert to RGB.
+        """
+        brightness = 1.0 if bright else 0.7
+        hsv = [(i / N, 1, brightness) for i in range(N)]
+        colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
+        random.shuffle(colors)
+        return colors
+
+    def apply_mask(self, image, mask, color, alpha=0.5):
+        """
+        Apply the given mask to the image.
+        """
+        for c in range(3):
+            image[:, :, c] = np.where(
+                mask == 1, 
+                image[:, :, c] * (1 - alpha) + alpha * color[c] * 255, 
+                image[:, :, c]
+            )
+        return image
+
+    def detect_result(self, image, min_score=0.2):
+        results = self.model.detect([image], verbose=0)[0]
+        boxes = results['rois']
+        masks = results['masks']
+        class_ids = results['class_ids']
+        classes_scores = results['scores']
+        
+        N = boxes.shape[0]
+        if not N:
+            print("\n*** No instances to display *** \n")
+        else:
+            assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
+            
+        colors = self.random_colors(N)
+        
+        return_boxes = []
+        return_scores = []
+        return_masks = []
+        return_class_names = []
+        return_class_color = []
+
+        for i in range(N):
+            class_id = class_ids[i]
+            classes_score = classes_scores[i]
+
+            if classes_score < min_score: continue
+
+            return_scores.append(classes_score)
+            y1, x1, y2, x2 = boxes[i]                
+            return_boxes.append([x1, y1, (x2 - x1), (y2 - y1)])
+            return_masks.append(masks[:, :, i])
+            return_class_names.append(COCO_CLASSES[class_id])
+            return_class_color.append(colors[i])
+
+        return return_boxes, return_scores, return_class_names, return_masks, return_class_color
+
+def isInSide(point, box):
+    # print(box[0] <= point[0] <= box[2] , box[1] <= point[1] <= box[3])
+    return box[0] <= point[0] <= box[2] and box[1] <= point[1] <= box[3]
\ No newline at end of file
diff --git a/src/tracker/mrcnn/utils.py b/src/tracker/mrcnn/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..696215ee01ef200b820bb3d8dde8aec3303608e6
--- /dev/null
+++ b/src/tracker/mrcnn/utils.py
@@ -0,0 +1,879 @@
+"""
+Mask R-CNN
+Common utility functions and classes.
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+"""
+
+import sys
+import os
+import logging
+import math
+import random
+import numpy as np
+import tensorflow as tf
+import scipy
+import skimage.color
+import skimage.io
+import skimage.transform
+import urllib.request
+import shutil
+import warnings
+from distutils.version import LooseVersion
+
+# URL from which to download the latest COCO trained weights
+COCO_MODEL_URL = "https://modelarts-maskrcnn.obs.cn-north-1.myhuaweicloud.com/data/mask_rcnn_coco.h5"
+
+
+############################################################
+#  Bounding Boxes
+############################################################
+
+def extract_bboxes(mask):
+    """Compute bounding boxes from masks.
+    mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
+    Returns: bbox array [num_instances, (y1, x1, y2, x2)].
+    """
+    boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
+    for i in range(mask.shape[-1]):
+        m = mask[:, :, i]
+        # Bounding box.
+        horizontal_indicies = np.where(np.any(m, axis=0))[0]
+        vertical_indicies = np.where(np.any(m, axis=1))[0]
+        if horizontal_indicies.shape[0]:
+            x1, x2 = horizontal_indicies[[0, -1]]
+            y1, y2 = vertical_indicies[[0, -1]]
+            # x2 and y2 should not be part of the box. Increment by 1.
+            x2 += 1
+            y2 += 1
+        else:
+            # No mask for this instance. Might happen due to
+            # resizing or cropping. Set bbox to zeros
+            x1, x2, y1, y2 = 0, 0, 0, 0
+        boxes[i] = np.array([y1, x1, y2, x2])
+    return boxes.astype(np.int32)
+
+
+def compute_iou(box, boxes, box_area, boxes_area):
+    """Calculates IoU of the given box with the array of the given boxes.
+    box: 1D vector [y1, x1, y2, x2]
+    boxes: [boxes_count, (y1, x1, y2, x2)]
+    box_area: float. the area of 'box'
+    boxes_area: array of length boxes_count.
+    Note: the areas are passed in rather than calculated here for
+    efficiency. Calculate once in the caller to avoid duplicate work.
+    """
+    # Calculate intersection areas
+    y1 = np.maximum(box[0], boxes[:, 0])
+    y2 = np.minimum(box[2], boxes[:, 2])
+    x1 = np.maximum(box[1], boxes[:, 1])
+    x2 = np.minimum(box[3], boxes[:, 3])
+    intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
+    union = box_area + boxes_area[:] - intersection[:]
+    iou = intersection / union
+    return iou
+
+
+def compute_overlaps(boxes1, boxes2):
+    """Computes IoU overlaps between two sets of boxes.
+    boxes1, boxes2: [N, (y1, x1, y2, x2)].
+    For better performance, pass the largest set first and the smaller second.
+    """
+    # Areas of anchors and GT boxes
+    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
+    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
+
+    # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
+    # Each cell contains the IoU value.
+    overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
+    for i in range(overlaps.shape[1]):
+        box2 = boxes2[i]
+        overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
+    return overlaps
+
+
+def compute_overlaps_masks(masks1, masks2):
+    """Computes IoU overlaps between two sets of masks.
+    masks1, masks2: [Height, Width, instances]
+    """
+    
+    # If either set of masks is empty return empty result
+    if masks1.shape[-1] == 0 or masks2.shape[-1] == 0:
+        return np.zeros((masks1.shape[-1], masks2.shape[-1]))
+    # flatten masks and compute their areas
+    masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
+    masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
+    area1 = np.sum(masks1, axis=0)
+    area2 = np.sum(masks2, axis=0)
+
+    # intersections and union
+    intersections = np.dot(masks1.T, masks2)
+    union = area1[:, None] + area2[None, :] - intersections
+    overlaps = intersections / union
+
+    return overlaps
+
+
+def non_max_suppression(boxes, scores, threshold):
+    """Performs non-maximum suppression and returns indices of kept boxes.
+    boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
+    scores: 1-D array of box scores.
+    threshold: Float. IoU threshold to use for filtering.
+    """
+    assert boxes.shape[0] > 0
+    if boxes.dtype.kind != "f":
+        boxes = boxes.astype(np.float32)
+
+    # Compute box areas
+    y1 = boxes[:, 0]
+    x1 = boxes[:, 1]
+    y2 = boxes[:, 2]
+    x2 = boxes[:, 3]
+    area = (y2 - y1) * (x2 - x1)
+
+    # Get indicies of boxes sorted by scores (highest first)
+    ixs = scores.argsort()[::-1]
+
+    pick = []
+    while len(ixs) > 0:
+        # Pick top box and add its index to the list
+        i = ixs[0]
+        pick.append(i)
+        # Compute IoU of the picked box with the rest
+        iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
+        # Identify boxes with IoU over the threshold. This
+        # returns indices into ixs[1:], so add 1 to get
+        # indices into ixs.
+        remove_ixs = np.where(iou > threshold)[0] + 1
+        # Remove indices of the picked and overlapped boxes.
+        ixs = np.delete(ixs, remove_ixs)
+        ixs = np.delete(ixs, 0)
+    return np.array(pick, dtype=np.int32)
+
+
+def apply_box_deltas(boxes, deltas):
+    """Applies the given deltas to the given boxes.
+    boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
+    deltas: [N, (dy, dx, log(dh), log(dw))]
+    """
+    boxes = boxes.astype(np.float32)
+    # Convert to y, x, h, w
+    height = boxes[:, 2] - boxes[:, 0]
+    width = boxes[:, 3] - boxes[:, 1]
+    center_y = boxes[:, 0] + 0.5 * height
+    center_x = boxes[:, 1] + 0.5 * width
+    # Apply deltas
+    center_y += deltas[:, 0] * height
+    center_x += deltas[:, 1] * width
+    height *= np.exp(deltas[:, 2])
+    width *= np.exp(deltas[:, 3])
+    # Convert back to y1, x1, y2, x2
+    y1 = center_y - 0.5 * height
+    x1 = center_x - 0.5 * width
+    y2 = y1 + height
+    x2 = x1 + width
+    return np.stack([y1, x1, y2, x2], axis=1)
+
+
+def box_refinement_graph(box, gt_box):
+    """Compute refinement needed to transform box to gt_box.
+    box and gt_box are [N, (y1, x1, y2, x2)]
+    """
+    box = tf.cast(box, tf.float32)
+    gt_box = tf.cast(gt_box, tf.float32)
+
+    height = box[:, 2] - box[:, 0]
+    width = box[:, 3] - box[:, 1]
+    center_y = box[:, 0] + 0.5 * height
+    center_x = box[:, 1] + 0.5 * width
+
+    gt_height = gt_box[:, 2] - gt_box[:, 0]
+    gt_width = gt_box[:, 3] - gt_box[:, 1]
+    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
+    gt_center_x = gt_box[:, 1] + 0.5 * gt_width
+
+    dy = (gt_center_y - center_y) / height
+    dx = (gt_center_x - center_x) / width
+    dh = tf.log(gt_height / height)
+    dw = tf.log(gt_width / width)
+
+    result = tf.stack([dy, dx, dh, dw], axis=1)
+    return result
+
+
+def box_refinement(box, gt_box):
+    """Compute refinement needed to transform box to gt_box.
+    box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
+    assumed to be outside the box.
+    """
+    box = box.astype(np.float32)
+    gt_box = gt_box.astype(np.float32)
+
+    height = box[:, 2] - box[:, 0]
+    width = box[:, 3] - box[:, 1]
+    center_y = box[:, 0] + 0.5 * height
+    center_x = box[:, 1] + 0.5 * width
+
+    gt_height = gt_box[:, 2] - gt_box[:, 0]
+    gt_width = gt_box[:, 3] - gt_box[:, 1]
+    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
+    gt_center_x = gt_box[:, 1] + 0.5 * gt_width
+
+    dy = (gt_center_y - center_y) / height
+    dx = (gt_center_x - center_x) / width
+    dh = np.log(gt_height / height)
+    dw = np.log(gt_width / width)
+
+    return np.stack([dy, dx, dh, dw], axis=1)
+
+
+############################################################
+#  Dataset
+############################################################
+
+class Dataset(object):
+    """The base class for dataset classes.
+    To use it, create a new class that adds functions specific to the dataset
+    you want to use. For example:
+    class CatsAndDogsDataset(Dataset):
+        def load_cats_and_dogs(self):
+            ...
+        def load_mask(self, image_id):
+            ...
+        def image_reference(self, image_id):
+            ...
+    See COCODataset and ShapesDataset as examples.
+    """
+
+    def __init__(self, class_map=None):
+        self._image_ids = []
+        self.image_info = []
+        # Background is always the first class
+        self.class_info = [{"source": "", "id": 0, "name": "BG"}]
+        self.source_class_ids = {}
+
+    def add_class(self, source, class_id, class_name):
+        assert "." not in source, "Source name cannot contain a dot"
+        # Does the class exist already?
+        for info in self.class_info:
+            if info['source'] == source and info["id"] == class_id:
+                # source.class_id combination already available, skip
+                return
+        # Add the class
+        self.class_info.append({
+            "source": source,
+            "id": class_id,
+            "name": class_name,
+        })
+
+    def add_image(self, source, image_id, path, **kwargs):
+        image_info = {
+            "id": image_id,
+            "source": source,
+            "path": path,
+        }
+        image_info.update(kwargs)
+        self.image_info.append(image_info)
+
+    def image_reference(self, image_id):
+        """Return a link to the image in its source Website or details about
+        the image that help looking it up or debugging it.
+        Override for your dataset, but pass to this function
+        if you encounter images not in your dataset.
+        """
+        return ""
+
+    def prepare(self, class_map=None):
+        """Prepares the Dataset class for use.
+        TODO: class map is not supported yet. When done, it should handle mapping
+              classes from different datasets to the same class ID.
+        """
+
+        def clean_name(name):
+            """Returns a shorter version of object names for cleaner display."""
+            return ",".join(name.split(",")[:1])
+
+        # Build (or rebuild) everything else from the info dicts.
+        self.num_classes = len(self.class_info)
+        self.class_ids = np.arange(self.num_classes)
+        self.class_names = [clean_name(c["name"]) for c in self.class_info]
+        self.num_images = len(self.image_info)
+        self._image_ids = np.arange(self.num_images)
+
+        # Mapping from source class and image IDs to internal IDs
+        self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
+                                      for info, id in zip(self.class_info, self.class_ids)}
+        self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id
+                                      for info, id in zip(self.image_info, self.image_ids)}
+
+        # Map sources to class_ids they support
+        self.sources = list(set([i['source'] for i in self.class_info]))
+        self.source_class_ids = {}
+        # Loop over datasets
+        for source in self.sources:
+            self.source_class_ids[source] = []
+            # Find classes that belong to this dataset
+            for i, info in enumerate(self.class_info):
+                # Include BG class in all datasets
+                if i == 0 or source == info['source']:
+                    self.source_class_ids[source].append(i)
+
+    def map_source_class_id(self, source_class_id):
+        """Takes a source class ID and returns the int class ID assigned to it.
+        For example:
+        dataset.map_source_class_id("coco.12") -> 23
+        """
+        return self.class_from_source_map[source_class_id]
+
+    def get_source_class_id(self, class_id, source):
+        """Map an internal class ID to the corresponding class ID in the source dataset."""
+        info = self.class_info[class_id]
+        assert info['source'] == source
+        return info['id']
+
+    @property
+    def image_ids(self):
+        return self._image_ids
+
+    def source_image_link(self, image_id):
+        """Returns the path or URL to the image.
+        Override this to return a URL to the image if it's available online for easy
+        debugging.
+        """
+        return self.image_info[image_id]["path"]
+
+    def load_image(self, image_id):
+        """Load the specified image and return a [H,W,3] Numpy array.
+        """
+        # Load image
+        image = skimage.io.imread(self.image_info[image_id]['path'])
+        # If grayscale. Convert to RGB for consistency.
+        if image.ndim != 3:
+            image = skimage.color.gray2rgb(image)
+        # If has an alpha channel, remove it for consistency
+        if image.shape[-1] == 4:
+            image = image[..., :3]
+        return image
+
+    def load_mask(self, image_id):
+        """Load instance masks for the given image.
+        Different datasets use different ways to store masks. Override this
+        method to load instance masks and return them in the form of am
+        array of binary masks of shape [height, width, instances].
+        Returns:
+            masks: A bool array of shape [height, width, instance count] with
+                a binary mask per instance.
+            class_ids: a 1D array of class IDs of the instance masks.
+        """
+        # Override this function to load a mask from your dataset.
+        # Otherwise, it returns an empty mask.
+        logging.warning("You are using the default load_mask(), maybe you need to define your own one.")
+        mask = np.empty([0, 0, 0])
+        class_ids = np.empty([0], np.int32)
+        return mask, class_ids
+
+
+def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"):
+    """Resizes an image keeping the aspect ratio unchanged.
+    min_dim: if provided, resizes the image such that it's smaller
+        dimension == min_dim
+    max_dim: if provided, ensures that the image longest side doesn't
+        exceed this value.
+    min_scale: if provided, ensure that the image is scaled up by at least
+        this percent even if min_dim doesn't require it.
+    mode: Resizing mode.
+        none: No resizing. Return the image unchanged.
+        square: Resize and pad with zeros to get a square image
+            of size [max_dim, max_dim].
+        pad64: Pads width and height with zeros to make them multiples of 64.
+               If min_dim or min_scale are provided, it scales the image up
+               before padding. max_dim is ignored in this mode.
+               The multiple of 64 is needed to ensure smooth scaling of feature
+               maps up and down the 6 levels of the FPN pyramid (2**6=64).
+        crop: Picks random crops from the image. First, scales the image based
+              on min_dim and min_scale, then picks a random crop of
+              size min_dim x min_dim. Can be used in training only.
+              max_dim is not used in this mode.
+    Returns:
+    image: the resized image
+    window: (y1, x1, y2, x2). If max_dim is provided, padding might
+        be inserted in the returned image. If so, this window is the
+        coordinates of the image part of the full image (excluding
+        the padding). The x2, y2 pixels are not included.
+    scale: The scale factor used to resize the image
+    padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
+    """
+    # Keep track of image dtype and return results in the same dtype
+    image_dtype = image.dtype
+    # Default window (y1, x1, y2, x2) and default scale == 1.
+    h, w = image.shape[:2]
+    window = (0, 0, h, w)
+    scale = 1
+    padding = [(0, 0), (0, 0), (0, 0)]
+    crop = None
+
+    if mode == "none":
+        return image, window, scale, padding, crop
+
+    # Scale?
+    if min_dim:
+        # Scale up but not down
+        scale = max(1, min_dim / min(h, w))
+    if min_scale and scale < min_scale:
+        scale = min_scale
+
+    # Does it exceed max dim?
+    if max_dim and mode == "square":
+        image_max = max(h, w)
+        if round(image_max * scale) > max_dim:
+            scale = max_dim / image_max
+
+    # Resize image using bilinear interpolation
+    if scale != 1:
+        image = resize(image, (round(h * scale), round(w * scale)),
+                       preserve_range=True)
+
+    # Need padding or cropping?
+    if mode == "square":
+        # Get new height and width
+        h, w = image.shape[:2]
+        top_pad = (max_dim - h) // 2
+        bottom_pad = max_dim - h - top_pad
+        left_pad = (max_dim - w) // 2
+        right_pad = max_dim - w - left_pad
+        padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
+        image = np.pad(image, padding, mode='constant', constant_values=0)
+        window = (top_pad, left_pad, h + top_pad, w + left_pad)
+    elif mode == "pad64":
+        h, w = image.shape[:2]
+        # Both sides must be divisible by 64
+        assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
+        # Height
+        if h % 64 > 0:
+            max_h = h - (h % 64) + 64
+            top_pad = (max_h - h) // 2
+            bottom_pad = max_h - h - top_pad
+        else:
+            top_pad = bottom_pad = 0
+        # Width
+        if w % 64 > 0:
+            max_w = w - (w % 64) + 64
+            left_pad = (max_w - w) // 2
+            right_pad = max_w - w - left_pad
+        else:
+            left_pad = right_pad = 0
+        padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
+        image = np.pad(image, padding, mode='constant', constant_values=0)
+        window = (top_pad, left_pad, h + top_pad, w + left_pad)
+    elif mode == "crop":
+        # Pick a random crop
+        h, w = image.shape[:2]
+        y = random.randint(0, (h - min_dim))
+        x = random.randint(0, (w - min_dim))
+        crop = (y, x, min_dim, min_dim)
+        image = image[y:y + min_dim, x:x + min_dim]
+        window = (0, 0, min_dim, min_dim)
+    else:
+        raise Exception("Mode {} not supported".format(mode))
+    return image.astype(image_dtype), window, scale, padding, crop
+
+
+def resize_mask(mask, scale, padding, crop=None):
+    """Resizes a mask using the given scale and padding.
+    Typically, you get the scale and padding from resize_image() to
+    ensure both, the image and the mask, are resized consistently.
+    scale: mask scaling factor
+    padding: Padding to add to the mask in the form
+            [(top, bottom), (left, right), (0, 0)]
+    """
+    # Suppress warning from scipy 0.13.0, the output shape of zoom() is
+    # calculated with round() instead of int()
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
+    if crop is not None:
+        y, x, h, w = crop
+        mask = mask[y:y + h, x:x + w]
+    else:
+        mask = np.pad(mask, padding, mode='constant', constant_values=0)
+    return mask
+
+
+def minimize_mask(bbox, mask, mini_shape):
+    """Resize masks to a smaller version to reduce memory load.
+    Mini-masks can be resized back to image scale using expand_masks()
+    See inspect_data.ipynb notebook for more details.
+    """
+    mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
+    for i in range(mask.shape[-1]):
+        # Pick slice and cast to bool in case load_mask() returned wrong dtype
+        m = mask[:, :, i].astype(bool)
+        y1, x1, y2, x2 = bbox[i][:4]
+        m = m[y1:y2, x1:x2]
+        if m.size == 0:
+            raise Exception("Invalid bounding box with area of zero")
+        # Resize with bilinear interpolation
+        m = resize(m, mini_shape)
+        mini_mask[:, :, i] = np.around(m).astype(np.bool)
+    return mini_mask
+
+
+def expand_mask(bbox, mini_mask, image_shape):
+    """Resizes mini masks back to image size. Reverses the change
+    of minimize_mask().
+    See inspect_data.ipynb notebook for more details.
+    """
+    mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
+    for i in range(mask.shape[-1]):
+        m = mini_mask[:, :, i]
+        y1, x1, y2, x2 = bbox[i][:4]
+        h = y2 - y1
+        w = x2 - x1
+        # Resize with bilinear interpolation
+        m = resize(m, (h, w))
+        mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
+    return mask
+
+
+# TODO: Build and use this function to reduce code duplication
+def mold_mask(mask, config):
+    pass
+
+
+def unmold_mask(mask, bbox, image_shape):
+    """Converts a mask generated by the neural network to a format similar
+    to its original shape.
+    mask: [height, width] of type float. A small, typically 28x28 mask.
+    bbox: [y1, x1, y2, x2]. The box to fit the mask in.
+    Returns a binary mask with the same size as the original image.
+    """
+    threshold = 0.5
+    y1, x1, y2, x2 = bbox
+    mask = resize(mask, (y2 - y1, x2 - x1))
+    mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
+
+    # Put the mask in the right location.
+    full_mask = np.zeros(image_shape[:2], dtype=np.bool)
+    full_mask[y1:y2, x1:x2] = mask
+    return full_mask
+
+
+############################################################
+#  Anchors
+############################################################
+
+def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
+    """
+    scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
+    ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
+    shape: [height, width] spatial shape of the feature map over which
+            to generate anchors.
+    feature_stride: Stride of the feature map relative to the image in pixels.
+    anchor_stride: Stride of anchors on the feature map. For example, if the
+        value is 2 then generate anchors for every other feature map pixel.
+    """
+    # Get all combinations of scales and ratios
+    scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
+    scales = scales.flatten()
+    ratios = ratios.flatten()
+
+    # Enumerate heights and widths from scales and ratios
+    heights = scales / np.sqrt(ratios)
+    widths = scales * np.sqrt(ratios)
+
+    # Enumerate shifts in feature space
+    shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
+    shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
+    shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
+
+    # Enumerate combinations of shifts, widths, and heights
+    box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
+    box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
+
+    # Reshape to get a list of (y, x) and a list of (h, w)
+    box_centers = np.stack(
+        [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
+    box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
+
+    # Convert to corner coordinates (y1, x1, y2, x2)
+    boxes = np.concatenate([box_centers - 0.5 * box_sizes,
+                            box_centers + 0.5 * box_sizes], axis=1)
+    return boxes
+
+
+def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
+                             anchor_stride):
+    """Generate anchors at different levels of a feature pyramid. Each scale
+    is associated with a level of the pyramid, but each ratio is used in
+    all levels of the pyramid.
+    Returns:
+    anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
+        with the same order of the given scales. So, anchors of scale[0] come
+        first, then anchors of scale[1], and so on.
+    """
+    # Anchors
+    # [anchor_count, (y1, x1, y2, x2)]
+    anchors = []
+    for i in range(len(scales)):
+        anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
+                                        feature_strides[i], anchor_stride))
+    return np.concatenate(anchors, axis=0)
+
+
+############################################################
+#  Miscellaneous
+############################################################
+
+def trim_zeros(x):
+    """It's common to have tensors larger than the available data and
+    pad with zeros. This function removes rows that are all zeros.
+    x: [rows, columns].
+    """
+    assert len(x.shape) == 2
+    return x[~np.all(x == 0, axis=1)]
+
+
+def compute_matches(gt_boxes, gt_class_ids, gt_masks,
+                    pred_boxes, pred_class_ids, pred_scores, pred_masks,
+                    iou_threshold=0.5, score_threshold=0.0):
+    """Finds matches between prediction and ground truth instances.
+    Returns:
+        gt_match: 1-D array. For each GT box it has the index of the matched
+                  predicted box.
+        pred_match: 1-D array. For each predicted box, it has the index of
+                    the matched ground truth box.
+        overlaps: [pred_boxes, gt_boxes] IoU overlaps.
+    """
+    # Trim zero padding
+    # TODO: cleaner to do zero unpadding upstream
+    gt_boxes = trim_zeros(gt_boxes)
+    gt_masks = gt_masks[..., :gt_boxes.shape[0]]
+    pred_boxes = trim_zeros(pred_boxes)
+    pred_scores = pred_scores[:pred_boxes.shape[0]]
+    # Sort predictions by score from high to low
+    indices = np.argsort(pred_scores)[::-1]
+    pred_boxes = pred_boxes[indices]
+    pred_class_ids = pred_class_ids[indices]
+    pred_scores = pred_scores[indices]
+    pred_masks = pred_masks[..., indices]
+
+    # Compute IoU overlaps [pred_masks, gt_masks]
+    overlaps = compute_overlaps_masks(pred_masks, gt_masks)
+
+    # Loop through predictions and find matching ground truth boxes
+    match_count = 0
+    pred_match = -1 * np.ones([pred_boxes.shape[0]])
+    gt_match = -1 * np.ones([gt_boxes.shape[0]])
+    for i in range(len(pred_boxes)):
+        # Find best matching ground truth box
+        # 1. Sort matches by score
+        sorted_ixs = np.argsort(overlaps[i])[::-1]
+        # 2. Remove low scores
+        low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
+        if low_score_idx.size > 0:
+            sorted_ixs = sorted_ixs[:low_score_idx[0]]
+        # 3. Find the match
+        for j in sorted_ixs:
+            # If ground truth box is already matched, go to next one
+            if gt_match[j] > -1:
+                continue
+            # If we reach IoU smaller than the threshold, end the loop
+            iou = overlaps[i, j]
+            if iou < iou_threshold:
+                break
+            # Do we have a match?
+            if pred_class_ids[i] == gt_class_ids[j]:
+                match_count += 1
+                gt_match[j] = i
+                pred_match[i] = j
+                break
+
+    return gt_match, pred_match, overlaps
+
+
+def compute_ap(gt_boxes, gt_class_ids, gt_masks,
+               pred_boxes, pred_class_ids, pred_scores, pred_masks,
+               iou_threshold=0.5):
+    """Compute Average Precision at a set IoU threshold (default 0.5).
+    Returns:
+    mAP: Mean Average Precision
+    precisions: List of precisions at different class score thresholds.
+    recalls: List of recall values at different class score thresholds.
+    overlaps: [pred_boxes, gt_boxes] IoU overlaps.
+    """
+    # Get matches and overlaps
+    gt_match, pred_match, overlaps = compute_matches(
+        gt_boxes, gt_class_ids, gt_masks,
+        pred_boxes, pred_class_ids, pred_scores, pred_masks,
+        iou_threshold)
+
+    # Compute precision and recall at each prediction box step
+    precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
+    recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
+
+    # Pad with start and end values to simplify the math
+    precisions = np.concatenate([[0], precisions, [0]])
+    recalls = np.concatenate([[0], recalls, [1]])
+
+    # Ensure precision values decrease but don't increase. This way, the
+    # precision value at each recall threshold is the maximum it can be
+    # for all following recall thresholds, as specified by the VOC paper.
+    for i in range(len(precisions) - 2, -1, -1):
+        precisions[i] = np.maximum(precisions[i], precisions[i + 1])
+
+    # Compute mean AP over recall range
+    indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
+    mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
+                 precisions[indices])
+
+    return mAP, precisions, recalls, overlaps
+
+
+def compute_ap_range(gt_box, gt_class_id, gt_mask,
+                     pred_box, pred_class_id, pred_score, pred_mask,
+                     iou_thresholds=None, verbose=1):
+    """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95."""
+    # Default is 0.5 to 0.95 with increments of 0.05
+    iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05)
+    
+    # Compute AP over range of IoU thresholds
+    AP = []
+    for iou_threshold in iou_thresholds:
+        ap, precisions, recalls, overlaps =\
+            compute_ap(gt_box, gt_class_id, gt_mask,
+                        pred_box, pred_class_id, pred_score, pred_mask,
+                        iou_threshold=iou_threshold)
+        if verbose:
+            print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap))
+        AP.append(ap)
+    AP = np.array(AP).mean()
+    if verbose:
+        print("AP @{:.2f}-{:.2f}:\t {:.3f}".format(
+            iou_thresholds[0], iou_thresholds[-1], AP))
+    return AP
+
+
+def compute_recall(pred_boxes, gt_boxes, iou):
+    """Compute the recall at the given IoU threshold. It's an indication
+    of how many GT boxes were found by the given prediction boxes.
+    pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
+    gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
+    """
+    # Measure overlaps
+    overlaps = compute_overlaps(pred_boxes, gt_boxes)
+    iou_max = np.max(overlaps, axis=1)
+    iou_argmax = np.argmax(overlaps, axis=1)
+    positive_ids = np.where(iou_max >= iou)[0]
+    matched_gt_boxes = iou_argmax[positive_ids]
+
+    recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
+    return recall, positive_ids
+
+
+# ## Batch Slicing
+# Some custom layers support a batch size of 1 only, and require a lot of work
+# to support batches greater than 1. This function slices an input tensor
+# across the batch dimension and feeds batches of size 1. Effectively,
+# an easy way to support batches > 1 quickly with little code modification.
+# In the long run, it's more efficient to modify the code to support large
+# batches and getting rid of this function. Consider this a temporary solution
+def batch_slice(inputs, graph_fn, batch_size, names=None):
+    """Splits inputs into slices and feeds each slice to a copy of the given
+    computation graph and then combines the results. It allows you to run a
+    graph on a batch of inputs even if the graph is written to support one
+    instance only.
+    inputs: list of tensors. All must have the same first dimension length
+    graph_fn: A function that returns a TF tensor that's part of a graph.
+    batch_size: number of slices to divide the data into.
+    names: If provided, assigns names to the resulting tensors.
+    """
+    if not isinstance(inputs, list):
+        inputs = [inputs]
+
+    outputs = []
+    for i in range(batch_size):
+        inputs_slice = [x[i] for x in inputs]
+        output_slice = graph_fn(*inputs_slice)
+        if not isinstance(output_slice, (tuple, list)):
+            output_slice = [output_slice]
+        outputs.append(output_slice)
+    # Change outputs from a list of slices where each is
+    # a list of outputs to a list of outputs and each has
+    # a list of slices
+    outputs = list(zip(*outputs))
+
+    if names is None:
+        names = [None] * len(outputs)
+
+    result = [tf.stack(o, axis=0, name=n)
+              for o, n in zip(outputs, names)]
+    if len(result) == 1:
+        result = result[0]
+
+    return result
+
+
+def download_trained_weights(coco_model_path, verbose=1):
+    """Download COCO trained weights from Releases.
+    coco_model_path: local path of COCO trained weights
+    """
+    if verbose > 0:
+        print("Downloading pretrained model to " + coco_model_path + " ...")
+    with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
+        shutil.copyfileobj(resp, out)
+    if verbose > 0:
+        print("... done downloading pretrained model!")
+
+
+def norm_boxes(boxes, shape):
+    """Converts boxes from pixel coordinates to normalized coordinates.
+    boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
+    shape: [..., (height, width)] in pixels
+    Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
+    coordinates it's inside the box.
+    Returns:
+        [N, (y1, x1, y2, x2)] in normalized coordinates
+    """
+    h, w = shape
+    scale = np.array([h - 1, w - 1, h - 1, w - 1])
+    shift = np.array([0, 0, 1, 1])
+    return np.divide((boxes - shift), scale).astype(np.float32)
+
+
+def denorm_boxes(boxes, shape):
+    """Converts boxes from normalized coordinates to pixel coordinates.
+    boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
+    shape: [..., (height, width)] in pixels
+    Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
+    coordinates it's inside the box.
+    Returns:
+        [N, (y1, x1, y2, x2)] in pixel coordinates
+    """
+    h, w = shape
+    scale = np.array([h - 1, w - 1, h - 1, w - 1])
+    shift = np.array([0, 0, 1, 1])
+    return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
+
+
+def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
+           preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
+    """A wrapper for Scikit-Image resize().
+    Scikit-Image generates warnings on every call to resize() if it doesn't
+    receive the right parameters. The right parameters depend on the version
+    of skimage. This solves the problem by using different parameters per
+    version. And it provides a central place to control resizing defaults.
+    """
+    if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
+        # New in 0.14: anti_aliasing. Default it to False for backward
+        # compatibility with skimage 0.13.
+        return skimage.transform.resize(
+            image, output_shape,
+            order=order, mode=mode, cval=cval, clip=clip,
+            preserve_range=preserve_range, anti_aliasing=anti_aliasing,
+            anti_aliasing_sigma=anti_aliasing_sigma)
+    else:
+        return skimage.transform.resize(
+            image, output_shape,
+            order=order, mode=mode, cval=cval, clip=clip,
+            preserve_range=preserve_range)
\ No newline at end of file
diff --git a/src/tracker/requirements-gpu.txt b/src/tracker/requirements-gpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3aaa0e1f3e55b0d796e7803a383869fa0401e7dd
--- /dev/null
+++ b/src/tracker/requirements-gpu.txt
@@ -0,0 +1,8 @@
+tensorflow-gpu==2.3.0rc0
+opencv-python==4.1.1.26
+lxml
+tqdm
+absl-py
+matplotlib
+easydict
+pillow
\ No newline at end of file
diff --git a/src/tracker/requirements.txt b/src/tracker/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39f8d6c1a5e96be15ba592d094af7129d838b40f
--- /dev/null
+++ b/src/tracker/requirements.txt
@@ -0,0 +1,29 @@
+# pip install -U -r requirements.txt
+# deepsort
+Cython
+matplotlib>=3.2.2
+numpy>=1.18.5
+pillow
+easydict
+# pycocotools>=2.0
+PyYAML>=5.3
+scipy>=1.4.1
+tensorboard>=2.2
+#torch>=1.6.0
+#torchvision>=0.7.0
+tqdm>=4.41.0
+
+# maskrcnn
+scikit-image
+keras>=2.0.8
+h5py
+imgaug
+IPython[all]
+
+# signboard segment
+pytorch-lightning==1.6.5
+
+opencv-python
+lxml
+tensorflow-gpu==2.7.0
+absl-py
diff --git a/src/tracker/setup.cfg b/src/tracker/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..0c61b02b26f817040d23411350c76d767d8827b8
--- /dev/null
+++ b/src/tracker/setup.cfg
@@ -0,0 +1,4 @@
+[metadata]
+description-file = README.md
+license-file = LICENSE
+requirements-file = requirements.txt
\ No newline at end of file
diff --git a/src/tracker/setup.py b/src/tracker/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..572e743ae21512968fffca8c48a945d50df33bee
--- /dev/null
+++ b/src/tracker/setup.py
@@ -0,0 +1,67 @@
+"""
+The build/compilations setup
+>> pip install -r requirements.txt
+>> python setup.py install
+"""
+import pip
+import logging
+import pkg_resources
+try:
+    from setuptools import setup
+except ImportError:
+    from distutils.core import setup
+
+
+def _parse_requirements(file_path):
+    pip_ver = pkg_resources.get_distribution('pip').version
+    pip_version = list(map(int, pip_ver.split('.')[:2]))
+    if pip_version >= [6, 0]:
+        raw = pip.req.parse_requirements(file_path,
+                                         session=pip.download.PipSession())
+    else:
+        raw = pip.req.parse_requirements(file_path)
+    return [str(i.req) for i in raw]
+
+
+# parse_requirements() returns generator of pip.req.InstallRequirement objects
+try:
+    install_reqs = _parse_requirements("requirements.txt")
+except Exception:
+    logging.warning('Fail load requirements file, so using default ones.')
+    install_reqs = []
+
+setup(
+    name='mask-rcnn',
+    version='2.1',
+    url='https://github.com/matterport/Mask_RCNN',
+    author='Matterport',
+    author_email='waleed.abdulla@gmail.com',
+    license='MIT',
+    description='Mask R-CNN for object detection and instance segmentation',
+    packages=["mrcnn"],
+    install_requires=install_reqs,
+    include_package_data=True,
+    python_requires='>=3.4',
+    long_description="""This is an implementation of Mask R-CNN on Python 3, Keras, and TensorFlow. 
+The model generates bounding boxes and segmentation masks for each instance of an object in the image. 
+It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.""",
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Environment :: Console",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Information Technology",
+        "Intended Audience :: Education",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: MIT License",
+        "Natural Language :: English",
+        "Operating System :: OS Independent",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Scientific/Engineering :: Image Recognition",
+        "Topic :: Scientific/Engineering :: Visualization",
+        "Topic :: Scientific/Engineering :: Image Segmentation",
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+    ],
+    keywords="image instance segmentation object detection mask rcnn r-cnn tensorflow keras",
+)
\ No newline at end of file
diff --git a/src/tracker/show_results.py b/src/tracker/show_results.py
new file mode 100644
index 0000000000000000000000000000000000000000..a185a128ef6e3da6fa0c81ba7fb54855ef4bbea0
--- /dev/null
+++ b/src/tracker/show_results.py
@@ -0,0 +1,110 @@
+import argparse
+
+import cv2
+import numpy as np
+
+import deep_sort_app
+from deep_sort.iou_matching import iou
+from application_util import visualization
+
+
+DEFAULT_UPDATE_MS = 20
+
+
+def run(sequence_dir, result_file, show_false_alarms=False, detection_file=None,
+        update_ms=None, video_filename=None):
+    """Run tracking result visualization.
+    Parameters
+    ----------
+    sequence_dir : str
+        Path to the MOTChallenge sequence directory.
+    result_file : str
+        Path to the tracking output file in MOTChallenge ground truth format.
+    show_false_alarms : Optional[bool]
+        If True, false alarms are highlighted as red boxes.
+    detection_file : Optional[str]
+        Path to the detection file.
+    update_ms : Optional[int]
+        Number of milliseconds between cosecutive frames. Defaults to (a) the
+        frame rate specifid in the seqinfo.ini file or DEFAULT_UDPATE_MS ms if
+        seqinfo.ini is not available.
+    video_filename : Optional[Str]
+        If not None, a video of the tracking results is written to this file.
+    """
+    seq_info = deep_sort_app.gather_sequence_info(sequence_dir, detection_file)
+    results = np.loadtxt(result_file, delimiter=',')
+
+    if show_false_alarms and seq_info["groundtruth"] is None:
+        raise ValueError("No groundtruth available. Cannot show false alarms.")
+
+    def frame_callback(vis, frame_idx):
+        print("Frame idx", frame_idx)
+        image = cv2.imread(
+            seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR)
+
+        vis.set_image(image.copy())
+
+        if seq_info["detections"] is not None:
+            detections = deep_sort_app.create_detections(
+                seq_info["detections"], frame_idx)
+            vis.draw_detections(detections)
+
+        mask = results[:, 0].astype(np.int) == frame_idx
+        track_ids = results[mask, 1].astype(np.int)
+        boxes = results[mask, 2:6]
+        vis.draw_groundtruth(track_ids, boxes)
+
+        if show_false_alarms:
+            groundtruth = seq_info["groundtruth"]
+            mask = groundtruth[:, 0].astype(np.int) == frame_idx
+            gt_boxes = groundtruth[mask, 2:6]
+            for box in boxes:
+                # NOTE(nwojke): This is not strictly correct, because we don't
+                # solve the assignment problem here.
+                min_iou_overlap = 0.5
+                if iou(box, gt_boxes).max() < min_iou_overlap:
+                    vis.viewer.color = 0, 0, 255
+                    vis.viewer.thickness = 4
+                    vis.viewer.rectangle(*box.astype(np.int))
+
+    if update_ms is None:
+        update_ms = seq_info["update_ms"]
+    if update_ms is None:
+        update_ms = DEFAULT_UPDATE_MS
+    visualizer = visualization.Visualization(seq_info, update_ms)
+    if video_filename is not None:
+        visualizer.viewer.enable_videowriter(video_filename)
+    visualizer.run(frame_callback)
+
+
+def parse_args():
+    """ Parse command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="Siamese Tracking")
+    parser.add_argument(
+        "--sequence_dir", help="Path to the MOTChallenge sequence directory.",
+        default=None, required=True)
+    parser.add_argument(
+        "--result_file", help="Tracking output in MOTChallenge file format.",
+        default=None, required=True)
+    parser.add_argument(
+        "--detection_file", help="Path to custom detections (optional).",
+        default=None)
+    parser.add_argument(
+        "--update_ms", help="Time between consecutive frames in milliseconds. "
+        "Defaults to the frame_rate specified in seqinfo.ini, if available.",
+        default=None)
+    parser.add_argument(
+        "--output_file", help="Filename of the (optional) output video.",
+        default=None)
+    parser.add_argument(
+        "--show_false_alarms", help="Show false alarms as red bounding boxes.",
+        type=bool, default=False)
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    run(
+        args.sequence_dir, args.result_file, args.show_false_alarms,
+        args.detection_file, args.update_ms, args.output_file)
\ No newline at end of file
diff --git a/src/tracker/signboard_segment/datasets_signboard_detection/__pycache__/datamodule.cpython-38.pyc b/src/tracker/signboard_segment/datasets_signboard_detection/__pycache__/datamodule.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35b159c615f7607ecb57c8bac1558ebda1a738bb
Binary files /dev/null and b/src/tracker/signboard_segment/datasets_signboard_detection/__pycache__/datamodule.cpython-38.pyc differ
diff --git a/src/tracker/signboard_segment/datasets_signboard_detection/__pycache__/dataset.cpython-38.pyc b/src/tracker/signboard_segment/datasets_signboard_detection/__pycache__/dataset.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..307ec6bff31601176bb685f2af11777a89668d18
Binary files /dev/null and b/src/tracker/signboard_segment/datasets_signboard_detection/__pycache__/dataset.cpython-38.pyc differ
diff --git a/src/tracker/signboard_segment/datasets_signboard_detection/__pycache__/utils.cpython-38.pyc b/src/tracker/signboard_segment/datasets_signboard_detection/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c9a14eaaa50a08c5971032e07a1d03b08d64c376
Binary files /dev/null and b/src/tracker/signboard_segment/datasets_signboard_detection/__pycache__/utils.cpython-38.pyc differ
diff --git a/src/tracker/signboard_segment/datasets_signboard_detection/datamodule.py b/src/tracker/signboard_segment/datasets_signboard_detection/datamodule.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fb4044e74148d498237abb0b786a5a55eee3648
--- /dev/null
+++ b/src/tracker/signboard_segment/datasets_signboard_detection/datamodule.py
@@ -0,0 +1,40 @@
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader
+from torchvision import transforms
+from src.tracker.signboard_segment.datasets_signboard_detection.dataset import PoIDataset
+import src.tracker.signboard_segment.datasets_signboard_detection.utils as utils
+
+
+class POIDataModule(pl.LightningDataModule):
+    def __init__(self,
+                 data,
+                 train_batch_size=16,
+                 test_batch_size=16,
+                 seed=42):
+        super().__init__()
+        self.data = data
+        self.train_batch_size = train_batch_size
+        self.test_batch_size = test_batch_size
+        self.seed = seed
+
+    def prepare_data(self):
+        pass
+
+    def setup(self, stage="fit"):
+        transform = [transforms.ToTensor()]
+        test_transform = transforms.Compose(transform)
+        if stage == "predict" or stage is None:
+            self.test_dataset = PoIDataset(self.data,
+                                           transforms=test_transform)
+
+    def predict_dataloader(self):
+        if self.test_dataset is not None:
+            return DataLoader(self.test_dataset,
+                              batch_size=self.test_batch_size,
+                              shuffle=False,
+                              num_workers=16,
+                              collate_fn=utils.collate_fn)
+
+    def _get_name(filepath):
+        images = filepath
+        return images
diff --git a/src/tracker/signboard_segment/datasets_signboard_detection/dataset.py b/src/tracker/signboard_segment/datasets_signboard_detection/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..154077c1654b3b1fd6afef16ddab07bdd951da42
--- /dev/null
+++ b/src/tracker/signboard_segment/datasets_signboard_detection/dataset.py
@@ -0,0 +1,31 @@
+from torch.utils.data import Dataset
+class Labelizer():
+    def __init__(self):
+        super().__init__()
+        self.labels = {'background': 0, 'signboard': 1}
+        self.inv_labels = {0: 'background', 1: 'signboard'}
+    def transform(self, label):
+        return self.labels[label]
+    
+    def inverse_transform(self, ys):
+        return self.inv_labels(ys)
+    
+    def num_classes(self):
+        return len(self.labels)
+
+class PoIDataset(Dataset):
+    def __init__(self,
+                 data,
+                 transforms=None):
+        self.data = data
+        self.transforms = transforms
+    
+    def __len__(self):
+        return 1
+    
+    def __getitem__(self, idx):
+        image = self.data
+        target = {}
+        if self.transforms is not None:
+            image = self.transforms(image)
+        return image, target
\ No newline at end of file
diff --git a/src/tracker/signboard_segment/datasets_signboard_detection/utils.py b/src/tracker/signboard_segment/datasets_signboard_detection/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..82ae79bc3fba0e9968c247a17de708e16b764068
--- /dev/null
+++ b/src/tracker/signboard_segment/datasets_signboard_detection/utils.py
@@ -0,0 +1,324 @@
+from collections import defaultdict, deque
+import datetime
+import pickle
+import time
+
+import torch
+import torch.distributed as dist
+
+import errno
+import os
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not is_dist_avail_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+        dist.barrier()
+        dist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value)
+
+
+def all_gather(data):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors)
+    Args:
+        data: any picklable object
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+
+    # serialized to a Tensor
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to("cuda")
+
+    # obtain Tensor size of each rank
+    local_size = torch.tensor([tensor.numel()], device="cuda")
+    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
+    dist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
+    if local_size != max_size:
+        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+        tensor = torch.cat((tensor, padding), dim=0)
+    dist.all_gather(tensor_list, tensor)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Args:
+        input_dict (dict): all the values will be reduced
+        average (bool): whether to do average or sum
+    Reduce the values in the dictionary from all processes so that all processes
+    have the averaged results. Returns a dict with the same fields as
+    input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.all_reduce(values)
+        if average:
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+
+
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError("'{}' object has no attribute '{}'".format(
+            type(self).__name__, attr))
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append(
+                "{}: {}".format(name, str(meter))
+            )
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ''
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt='{avg:.4f}')
+        data_time = SmoothedValue(fmt='{avg:.4f}')
+        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+        if torch.cuda.is_available():
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}',
+                'max mem: {memory:.0f}'
+            ])
+        else:
+            log_msg = self.delimiter.join([
+                header,
+                '[{0' + space_fmt + '}/{1}]',
+                'eta: {eta}',
+                '{meters}',
+                'time: {time}',
+                'data: {data}'
+            ])
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time),
+                        memory=torch.cuda.max_memory_allocated() / MB))
+                else:
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time)))
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print('{} Total time: {} ({:.4f} s / it)'.format(
+            header, total_time_str, total_time / len(iterable)))
+
+
+def collate_fn(batch):
+    return tuple(zip(*batch))
+
+
+def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
+
+    def f(x):
+        if x >= warmup_iters:
+            return 1
+        alpha = float(x) / warmup_iters
+        return warmup_factor * (1 - alpha) + alpha
+
+    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
+
+
+def mkdir(path):
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop('force', False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ['WORLD_SIZE'])
+        args.gpu = int(os.environ['LOCAL_RANK'])
+    elif 'SLURM_PROCID' in os.environ:
+        args.rank = int(os.environ['SLURM_PROCID'])
+        args.gpu = args.rank % torch.cuda.device_count()
+    else:
+        print('Not using distributed mode')
+        args.distributed = False
+        return
+
+    args.distributed = True
+
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = 'nccl'
+    print('| distributed init (rank {}): {}'.format(
+        args.rank, args.dist_url), flush=True)
+    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                         world_size=args.world_size, rank=args.rank)
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
diff --git a/src/tracker/signboard_segment/det_models/__pycache__/backbone.cpython-38.pyc b/src/tracker/signboard_segment/det_models/__pycache__/backbone.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6fee3afbd73a60d0152e8c65af551307e66c7f41
Binary files /dev/null and b/src/tracker/signboard_segment/det_models/__pycache__/backbone.cpython-38.pyc differ
diff --git a/src/tracker/signboard_segment/det_models/__pycache__/inference_signboard_detection.cpython-38.pyc b/src/tracker/signboard_segment/det_models/__pycache__/inference_signboard_detection.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..43173d3c0c873eef0a1597e28199c55192bcc737
Binary files /dev/null and b/src/tracker/signboard_segment/det_models/__pycache__/inference_signboard_detection.cpython-38.pyc differ
diff --git a/src/tracker/signboard_segment/det_models/__pycache__/model.cpython-38.pyc b/src/tracker/signboard_segment/det_models/__pycache__/model.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9c3912906dc096795174d749efe14da88dfd4e38
Binary files /dev/null and b/src/tracker/signboard_segment/det_models/__pycache__/model.cpython-38.pyc differ
diff --git a/src/tracker/signboard_segment/det_models/backbone.py b/src/tracker/signboard_segment/det_models/backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..052d8ec0ee9620b0877675194297413ea177a42c
--- /dev/null
+++ b/src/tracker/signboard_segment/det_models/backbone.py
@@ -0,0 +1,45 @@
+import torchvision.models.detection as models
+from torchvision.models.detection.retinanet import RetinaNetClassificationHead
+from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
+
+def set_parameter_requires_grad(model, 
+                                tune_only: bool = False):
+    if tune_only:
+        for child in list(model.children()):
+            for param in child.parameters():
+                param.requires_grad = False
+
+
+def initialize_model(model_name: str, 
+                     num_classes: int, 
+                     tune_only: bool = False, 
+                     use_pretrained: bool = True):
+    input_size = 0
+
+    model = getattr(models, model_name, lambda: None)
+    model_ft = model(pretrained=use_pretrained)
+    set_parameter_requires_grad(model_ft, tune_only)
+
+    if model_name.startswith("maskrcnn"):
+        print("Using Mask RCNN")
+        mask_predictor_in_channels = 256
+        mask_dim_reduced = 256
+        model_ft.mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels, mask_dim_reduced, num_classes)
+
+    elif model_name.startswith("fasterrcnn"):
+        print("Using Mask Faster RCNN")
+        from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+        # get number of input features for the classifier
+        in_features = model_ft.roi_heads.box_predictor.cls_score.in_features
+        model_ft.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+        
+    elif model_name.startswith("retinanet"):
+        print("Using RetinaNet")
+        in_channels = model_ft.head.classification_head.cls_logits.in_channels
+        num_anchors = model_ft.head.classification_head.num_anchors
+        # replace the pre-trained head with a new one
+        model_ft.head.classification_head = RetinaNetClassificationHead(in_channels, num_anchors, num_classes)
+    else:
+        raise ValueError("{0} is not supported!".format(model_name))
+
+    return model_ft, input_size
\ No newline at end of file
diff --git a/src/tracker/signboard_segment/det_models/inference_signboard_detection.py b/src/tracker/signboard_segment/det_models/inference_signboard_detection.py
new file mode 100644
index 0000000000000000000000000000000000000000..69570b7054ce1b2bd77f52915a2d8e23f3f1f5c0
--- /dev/null
+++ b/src/tracker/signboard_segment/det_models/inference_signboard_detection.py
@@ -0,0 +1,77 @@
+import numpy as np
+import pytorch_lightning as pl
+from src.tracker.signboard_segment.datasets_signboard_detection.dataset import Labelizer
+
+class Color_convert():
+    def __init__(self):
+        super().__init__()
+        self.labels = {'signboard': "red"}
+        
+    def transform(self, label):
+        return self.labels[label]
+    
+    def num_classes(self):
+        return len(self.labels)
+
+class POIDetectionTask(pl.LightningModule):
+    def __init__(self,
+                 model):
+        super().__init__()
+        
+        self.model = model
+        self.output = []
+        self.pred_boxes = []
+        self.pred_masks = []
+        self.pred_scores = []
+        self.pred_labels = []
+        self.labelizer = Labelizer()
+        self.color_convert = Color_convert()
+        self.num_sticker = 0
+
+    def forward(self, x):
+        output = self.model(x)
+        return output
+
+    def predict_step(self, test_batch, batch_idx):
+        images, targets = test_batch
+        outputs = self(images)
+        for target in outputs:
+            boxes = target['boxes']
+            masks = target['masks']
+            scores = target['scores']
+            labels = target['labels']
+            boxes = boxes.numpy()
+            masks = masks.numpy()
+            scores = scores.numpy()
+            labels = labels.numpy()
+            select_boxes = []
+            select_masks = []
+            select_scores = []
+            select_labels = []
+            for i in range(0,len(scores)):
+                if (scores[i]>0.7):
+                    select_boxes.append(boxes[i].tolist())
+                    select_masks.extend(masks[i].tolist())
+                    select_scores.append(scores[i])
+                    select_labels.append(labels[i])
+            select_boxes = [select_boxes]
+            select_masks = [select_masks]
+            select_scores = [select_scores]
+            select_labels = [select_labels]
+            self.pred_boxes = np.array(select_boxes, dtype=np.int32)
+            self.pred_masks = np.array(select_masks, dtype=np.uint8)
+            self.pred_scores = np.array(select_scores, dtype=np.float32)
+            self.pred_labels = np.array(select_labels, dtype=np.int32)
+    
+    def on_predict_end(self):
+        pred_boxes = self.pred_boxes[0]
+        pred_masks = self.pred_masks[0]
+        pred_scores = self.pred_scores[0]
+        pred_labels = self.pred_labels[0]
+        
+        self.output = {
+            'rois': pred_boxes,
+            'masks': pred_masks,
+            'scores': pred_scores,
+            'class_ids': pred_labels
+        }
diff --git a/src/tracker/signboard_segment/det_models/model.py b/src/tracker/signboard_segment/det_models/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..686fa0b07dc0154d4833c6dda05ab70d1dd28f0c
--- /dev/null
+++ b/src/tracker/signboard_segment/det_models/model.py
@@ -0,0 +1,21 @@
+import pytorch_lightning as pl
+from src.tracker.signboard_segment.det_models.backbone import initialize_model
+
+class POIDetection(pl.LightningModule):
+    def __init__(self,
+                 n_classes,
+                 **kwargs):
+        super().__init__()
+        self.save_hyperparameters()
+        self.model, _ = initialize_model(kwargs["backbone"], 
+                                         n_classes, 
+                                         tune_only=kwargs["tune_fc_only"])
+        
+    def forward(self, images, targets=None):
+        images = list(image for image in images)
+        if targets is not None :
+            targets = [{k: v for k, v in t.items()} for t in targets]
+            outputs = self.model(images, targets)
+        else:
+            outputs = self.model(images)
+        return outputs
diff --git a/src/tracker/signboard_segment/main.py b/src/tracker/signboard_segment/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2be6785dec9bda9f4ac0767d97967423bf9a4b5
--- /dev/null
+++ b/src/tracker/signboard_segment/main.py
@@ -0,0 +1,86 @@
+from signboard_detect import inference_signboard
+import os
+import argparse
+import tqdm
+import cv2
+import numpy as np
+from PIL import Image
+
+def compose(output, mask):
+    h,w = mask.shape
+
+    for i in range(0, h):
+        for j in range(0,w):
+            if (mask[i,j] > 0.5):
+                output[i,j] = 255
+    return output
+
+def get_parser():
+    parser = argparse.ArgumentParser(description="Signboard Detection")
+
+    parser.add_argument("--input",
+                        type=str,
+                        default="./images",
+                        help="A list of space separated input images")
+    parser.add_argument("--output",
+                        type=str,
+                        default="./output/output_signboard",
+                        help="A list of array of segmentation")
+    parser.add_argument("--checkpoint",
+                        type=str,
+                        default="./checkpoints/ss/ss.ckpt",
+                        help="File path to best model checkpoint")
+
+    args = parser.parse_args()
+    return args
+
+
+def handle(args):
+    if args.input:
+        if os.path.isdir(args.input):
+            args.input = [os.path.join(args.input, fname)
+                          for fname in os.listdir(args.input)]
+        elif os.path.isfile(args.input):
+            args.input = [args.input]
+
+    for path in tqdm.tqdm(args.input):
+        print(path)
+        img = cv2.imread(path)
+        
+        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        im_pil = Image.fromarray(image)
+        
+        dimensions = img.shape
+        hei, wid = dimensions[0], dimensions[1]
+        print(hei, wid)
+        result = inference_signboard(im_pil, args.checkpoint)
+        print(" **************** Result **************** ")
+        print(result['rois'].shape)
+        print(result['masks'].shape)
+        print(result['class_ids'].shape)
+        print(result['scores'].shape)
+        print(" **************************************** ")
+        for box in result['rois']:
+            box = box.tolist()
+            image = cv2.rectangle(img, (int(box[0]), int(
+                box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)
+        root_ext = os.path.splitext(path)
+        output_path = os.path.join(args.output, root_ext[0] + "_output" + root_ext[1])
+        cv2.imwrite(output_path, image)
+        
+        img_output = np.zeros((hei,wid), dtype="uint8")
+        for j in range(0,len(result['masks'])):
+            mask = result['masks'][j]
+            im_np = np.array(mask)
+            img_output = compose(img_output, im_np)
+        output_path = os.path.join(args.output, root_ext[0] + "_mask" + root_ext[1])
+        cv2.imwrite(output_path, img_output)
+
+
+def main():
+    args = get_parser()
+    handle(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/tracker/signboard_segment/signboard_detect.py b/src/tracker/signboard_segment/signboard_detect.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0be85e37a4f34912f077b4827cb5104288c7ac7
--- /dev/null
+++ b/src/tracker/signboard_segment/signboard_detect.py
@@ -0,0 +1,22 @@
+import pytorch_lightning as pl
+from det_models.model import POIDetection
+from datasets_signboard_detection.datamodule import POIDataModule
+from det_models.inference_signboard_detection import POIDetectionTask
+
+def load_model(checkpoint_path):
+    model = POIDetection.load_from_checkpoint(checkpoint_path=checkpoint_path)
+    return model
+
+
+def inference_signboard(image, checkpoint):
+
+    dm = POIDataModule(data=image, seed=42)
+    dm.setup("predict")
+
+    model = load_model(checkpoint)
+    task = POIDetectionTask(model)
+
+    # accelerator='gpu', devices=1
+    trainer = pl.Trainer(gpus=0, max_epochs=-1)
+    trainer.predict(task, datamodule=dm)
+    return task.output
diff --git a/src/tracker/signboard_track.py b/src/tracker/signboard_track.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcfef96ef5a5dd129853ad2f0beefc17bea53290
--- /dev/null
+++ b/src/tracker/signboard_track.py
@@ -0,0 +1,213 @@
+from src.tracker.mrcnn.mrcnn_color import MRCNN
+from src.tracker._tools_ import generate_detections as gdet
+from src.tracker.deep_sort.tracker import Tracker
+from src.tracker.deep_sort.detection import Detection
+from src.tracker.application_util import preprocessing
+from src.tracker.deep_sort import nn_matching
+import matplotlib.pyplot as plt
+import numpy as np
+import cv2
+from PIL import Image
+import tensorflow as tf
+import time
+import ffmpeg
+import os
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+
+physical_devices = tf.config.experimental.list_physical_devices('GPU')
+if len(physical_devices) > 0:
+    tf.config.experimental.set_memory_growth(physical_devices[0], True)
+
+# deep sort imports
+# deepsort
+
+
+def check_rotation(path_video_file):
+    print(path_video_file)
+    meta_dict = ffmpeg.probe(path_video_file)
+    try:
+        if int(meta_dict['streams'][0]['tags']['rotate']) == 90:
+            return cv2.ROTATE_90_CLOCKWISE
+        elif int(meta_dict['streams'][0]['tags']['rotate']) == 180:
+            return cv2.ROTATE_180
+        elif int(meta_dict['streams'][0]['tags']['rotate']) == 270:
+            return cv2.ROTATE_90_COUNTERCLOCKWISE
+    except:
+        return None
+
+
+def correct_rotation(frame, rotateCode):
+    return cv2.rotate(frame, rotateCode)
+
+
+class SignboardTracker():
+    def __init__(self,
+                 detector_checkpoint: str = "./checkpoints/ss/ss.ckpt",
+                 input_size: int = 1024,
+                 score: float = 0.7,
+                 size: int = 1024,
+                 video: str = "",
+                 output: str = "",
+                 output_format: str = "",
+                 dont_show: bool = True,
+                 info: bool = True,
+                 count: bool = True,
+
+                 max_cosine_distance: float = 0.4,
+                 nn_budget: None = None,
+                 nms_max_overlap: float = 1.0,
+
+                 tracker_checkpoint: str = "./checkpoints/tracker/signboard_2793.pb"
+                 ) -> None:
+        self.detector_checkpoint = detector_checkpoint
+        self.input_size = input_size
+        self.score = score
+        self.size = size
+        self.video = video
+        self.output = output
+        self.output_format = output_format
+        self.dont_show = dont_show
+        self.info = info
+        self.count = count
+
+        self.max_cosine_distance = max_cosine_distance
+        self.nn_budget = nn_budget
+        self.nms_max_overlap = nms_max_overlap
+
+        self.tracker_checkpoint = tracker_checkpoint
+
+        self.load_tracker()
+        self.load_detector()
+
+    def load_tracker(self):
+        self.encoder = gdet.create_box_encoder(self.tracker_checkpoint, batch_size=1)
+        metric = nn_matching.NearestNeighborDistanceMetric("cosine", self.max_cosine_distance, self.nn_budget)
+        self.tracker = Tracker(metric)
+
+    def load_detector(self):
+        self.mrcnn = MRCNN(self.detector_checkpoint, self.input_size, self.score)
+
+    def inference_signboard(self, fps_target, video_path, output, output_format, output_frames):
+        results = {}
+        results_ = {}
+
+        rotateCode = check_rotation(video_path)
+        try:
+            vid = cv2.VideoCapture(int(video_path))
+        except:
+            vid = cv2.VideoCapture(video_path)
+
+        out = None
+
+        # get video ready to save locally if flag is set
+        if output:
+            # by default VideoCapture returns float instead of int
+            width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = int(vid.get(cv2.CAP_PROP_FPS))
+            codec = cv2.VideoWriter_fourcc(*output_format)
+            out = cv2.VideoWriter(output, codec, fps_target, (width, height))
+        tg = (fps - (fps%fps_target)) / fps_target
+        frame_num = 0
+        # while video is running
+        while True:
+            return_value, frame = vid.read()
+            # original = frame
+            if return_value:
+                frame_num += 1
+                if rotateCode is not None:
+                    frame = correct_rotation(frame, rotateCode)
+                image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                image = Image.fromarray(image)
+            else:
+                break
+            if (frame_num%tg)==0:
+                if str(frame_num) not in results:
+                    results[str(frame_num)] = []
+
+                start_time = time.time()
+
+                boxes, scores, class_names, class_ids, class_color = self.mrcnn.detect_result_(image, min_score=0.4)
+
+                count = len(class_names)
+
+                # encode yolo detections and feed to tracker
+                features = self.encoder(frame, boxes)
+                detections = [Detection(box, score, class_name, feature) for box, score, class_name, feature in zip(boxes, scores, class_names, features)]
+
+                # initialize color map
+                cmap = plt.get_cmap('tab20b')
+                colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
+
+                # run non-maxima supression
+                boxs = np.array([d.tlwh for d in detections])
+                scores = np.array([d.confidence for d in detections])
+                classes = np.array([d.class_name for d in detections])
+                indices = preprocessing.non_max_suppression(boxs, classes, self.nms_max_overlap, scores)
+                detections = [detections[i] for i in indices]
+
+                # Call the tracker
+                self.tracker.predict()
+                self.tracker.update(detections)
+
+                # update tracks
+                # with open(f"{output_frames}/{frame_num}.txt", "a+", encoding="utf-8") as ff:
+                for track in self.tracker.tracks:
+                    if not track.is_confirmed() or track.time_since_update > 1:
+                        continue
+                    bbox = track.to_tlbr()
+                    class_name = track.get_class()
+
+                    # crop to ids folder
+                    ids_path = f"{output_frames}"
+                    # print(ids_path)
+                    if not os.path.isdir(ids_path):
+                        os.makedirs(ids_path)
+                    crop_ids = frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]
+                    # num_ids = 0
+
+                    # if os.path.isfile(os.path.join(ids_path, str(track.track_id) + "_" + str(frame_num)+".png")):
+                    #     num_ids += 1
+                    final_ids_path = os.path.join(ids_path, str(track.track_id) + "_" + str(frame_num)+".png")
+                    try:
+                        cv2.imwrite(final_ids_path, crop_ids)
+                    except Exception as e:
+                        print(e)
+
+                # draw bbox on screen
+                    color = colors[int(track.track_id) % len(colors)]
+                    color = [i * 255 for i in color]
+                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
+                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
+                    cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75, (255, 255, 255), 2)
+
+                # if enable info flag then print details about each track
+                    results[str(frame_num)].append({
+                        "id": track.track_id,
+                        "class": class_name,
+                        "box": [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]
+                    })
+
+                    if str(track.track_id) not in results_:
+                        results_[str(track.track_id)] = []
+                    results_[str(track.track_id)].append({
+                        "frame": frame_num,
+                        "class": class_name,
+                        "box": [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]
+                    })
+                    # ff.close()
+
+                # calculate frames per second of running detections
+                # fps = 1.0 / (time.time() - start_time)
+                result = frame
+
+                # if output flag is set, save video file
+                if output:
+                    cv2.imwrite(f"{output}/{frame_num}.jpg", result)
+                    out.write(result)
+                if cv2.waitKey(1) & 0xFF == ord('q'):
+                    break
+        cv2.destroyAllWindows()
+        return [results, results_]
\ No newline at end of file
diff --git a/static/css/.gitkeep b/static/css/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/templates/detector.html b/templates/detector.html
new file mode 100644
index 0000000000000000000000000000000000000000..641f83199567d5f659ed3e73acc561ce7612e1e4
--- /dev/null
+++ b/templates/detector.html
@@ -0,0 +1,15 @@
+<!doctype html>
+<html>
+
+<head>
+	<title>Python Flask - Video Upload and Play Example</title>
+</head>
+
+<body>
+	<form method="POST" action="http://localhost:8001/uploadfile" enctype="multipart/form-data">
+		<input type="file" name="file" accept="image/*" />
+		<input type="submit" />
+	</form>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/templates/tracker.html b/templates/tracker.html
new file mode 100644
index 0000000000000000000000000000000000000000..cf50b361e75aef0c3bee712b4dbbc1ed90566ce9
--- /dev/null
+++ b/templates/tracker.html
@@ -0,0 +1,15 @@
+<!doctype html>
+<html>
+
+<head>
+	<title>Python Flask - Video Upload and Play Example</title>
+</head>
+
+<body>
+	<form method="POST" action="http://localhost:8080/uploadfile" enctype="multipart/form-data">
+		<input type="file" name="file" accept="video/*" />
+		<input type="submit" />
+	</form>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/utils/.gitkeep b/utils/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/utils/image.py b/utils/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab441e24965999f18f37609e03bb469cf610a141
--- /dev/null
+++ b/utils/image.py
@@ -0,0 +1,25 @@
+import os
+import cv2
+from src.ss.ss import handle_ss
+from src.sts.demo.sts import handle_sts
+from src.ir.ir import handle_ir
+
+
+def create_folder(dir_path):
+    if not os.path.isdir(dir_path):
+        os.mkdir(dir_path)
+    return
+
+
+def parse(image, filename):
+    dir_path = os.path.basename(filename).split('.')[0]
+    dir_path = os.path.join("./outputs", dir_path)
+    create_folder(dir_path)
+    filepath = os.path.join(dir_path, filename)
+    image.save(filepath)
+    segment_path, segment_array = handle_ss(filepath, dir_path)
+    output_path_box, output_path_text, output_path_visual, dict_box_sign_out, dict_rec_sign_out = handle_sts(
+        filepath, segment_path, dir_path)
+    predicted = handle_ir(filepath, dict_rec_sign_out, dir_path)
+    print(predicted)
+    return
diff --git a/utils/utils.py b/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e16d102e9397582aa5eacc91a3a456c3c87d32e
--- /dev/null
+++ b/utils/utils.py
@@ -0,0 +1,37 @@
+import ffmpeg
+import cv2
+import os
+
+ALLOWED_EXTENSIONS = set(['txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'])
+
+
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+
+
+def check_rotation(path_video_file):
+    meta_dict = ffmpeg.probe(path_video_file)
+    rotateCode = None
+    print(int(meta_dict['streams'][0]['tags']['rotate']))
+    if int(meta_dict['streams'][0]['tags']['rotate']) == 90:
+        rotateCode = cv2.ROTATE_180
+    elif int(meta_dict['streams'][0]['tags']['rotate']) == 180:
+        rotateCode = cv2.ROTATE_90_COUNTERCLOCKWISE
+    elif int(meta_dict['streams'][0]['tags']['rotate']) == 270:
+        rotateCode = cv2.ROTATE_90_CLOCKWISE
+    return rotateCode
+
+
+def correct_rotation(frame, rotateCode):
+    return cv2.rotate(frame, rotateCode)
+
+
+def create_folder(dir_path):
+    if not os.path.isdir(dir_path):
+        os.mkdir(dir_path)
+    subfolder = ['frames', 'ss', 'sts', 'ir']
+    subfolder = [os.path.join(dir_path, subf) for subf in subfolder]
+    for subf in subfolder:
+        if not os.path.isdir(subf):
+            os.mkdir(subf)
+    return subfolder
diff --git a/utils/video.py b/utils/video.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0d30963de660c77f4d892e851f0d231ff371606
--- /dev/null
+++ b/utils/video.py
@@ -0,0 +1,64 @@
+import os
+import cv2
+from src.ss.ss import handle_ss
+from src.sts.demo.sts import handle_sts
+from src.ir.ir import handle_ir
+from utils.utils import correct_rotation, create_folder
+
+
+def gen_frames(video, dir_path, rotateCode):
+    subpath = create_folder(dir_path)
+    yield b'--frame\r\n'
+    count = 0
+    while True:
+        ret, frame = video.read()
+        # if frame is read correctly ret is True
+        if rotateCode is not None:
+            frame = correct_rotation(frame, rotateCode)
+        if ret:
+            print('Read a new frame: ', ret)
+            img_path = os.path.join(subpath[0], "frames_%d.jpg" % count)
+            cv2.imwrite(img_path, frame)
+            segment_path, segment_array = handle_ss(img_path, subpath[1])
+            output_path_box, output_path_text, output_path_visual, dict_box_sign_out, dict_rec_sign_out = handle_sts(
+                img_path, segment_path, subpath[2])
+            predicted = handle_ir(img_path, dict_rec_sign_out, subpath[3])
+            print(predicted)
+
+        else:
+            print("Can't receive frame (stream end?). Exiting ...")
+            break
+        count = count + 1
+        ret, jpeg = cv2.imencode('.jpg', frame)
+        frame = jpeg.tobytes()
+        yield (b'--frame\r\n'
+               b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
+
+
+def gen_videos(file_in, file_out):
+    cap = cv2.VideoCapture(file_in)
+
+    width = cap.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)   # float `width`
+    height = cap.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)
+    fps = cap.get(cv2.cv.CV_CAP_PROP_FPS)
+
+    print(width, height, fps)
+
+    # Define the codec and create VideoWriter object
+    fourcc = cv2.VideoWriter_fourcc(*'XVID')
+    out = cv2.VideoWriter(file_out, fourcc, 20.0, (640,  480))
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            print("Can't receive frame (stream end?). Exiting ...")
+            break
+        frame = cv2.flip(frame, 0)
+        # write the flipped frame
+        out.write(frame)
+        cv2.imshow('frame', frame)
+        if cv2.waitKey(1) == ord('q'):
+            break
+    # Release everything if job is finished
+    cap.release()
+    out.release()
+    cv2.destroyAllWindows()